Skip to content

Commit 559b27e

Browse files
authored
Add video encoding tutorial doc (#1063)
1 parent cac99ae commit 559b27e

File tree

5 files changed

+313
-9
lines changed

5 files changed

+313
-9
lines changed

docs/source/api_ref_encoders.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_a
1616
:template: class.rst
1717

1818
AudioEncoder
19+
VideoEncoder

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def __call__(self, filename):
8787
assert "examples/encoding" in self.src_dir
8888
order = [
8989
"audio_encoding.py",
90+
"video_encoding.py",
9091
]
9192

9293
try:

docs/source/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ Encoding
9898

9999
How encode audio samples
100100

101+
.. grid-item-card:: :octicon:`file-code;1em`
102+
Video Encoding
103+
:img-top: _static/img/card-background.svg
104+
:link: generated_examples/encoding/video_encoding.html
105+
:link-type: url
106+
107+
How to encode video frames
108+
101109
.. toctree::
102110
:maxdepth: 1
103111
:caption: TorchCodec documentation
Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
"""
8+
=======================================
9+
Encoding video frames with VideoEncoder
10+
=======================================
11+
12+
In this example, we'll learn how to encode video frames to a file or to raw
13+
bytes using the :class:`~torchcodec.encoders.VideoEncoder` class.
14+
"""
15+
16+
# %%
17+
# First, we'll download a video and decode some frames to tensors.
18+
# These will be the input to the :class:`~torchcodec.encoders.VideoEncoder`. For more details on decoding,
19+
# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
20+
# Otherwise, skip ahead to :ref:`creating_encoder`.
21+
22+
import requests
23+
from torchcodec.decoders import VideoDecoder
24+
from IPython.display import Video
25+
26+
27+
def play_video(encoded_bytes):
28+
return Video(
29+
data=encoded_bytes.numpy().tobytes(),
30+
embed=True,
31+
width=640,
32+
height=360,
33+
mimetype="video/mp4",
34+
)
35+
36+
37+
# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/
38+
# Author: Altaf Shah.
39+
url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4"
40+
41+
response = requests.get(url, headers={"User-Agent": ""})
42+
if response.status_code != 200:
43+
raise RuntimeError(f"Failed to download video. {response.status_code = }.")
44+
45+
raw_video_bytes = response.content
46+
47+
decoder = VideoDecoder(raw_video_bytes)
48+
frames = decoder.get_frames_in_range(0, 60).data # Get first 60 frames
49+
frame_rate = decoder.metadata.average_fps
50+
51+
# %%
52+
# .. _creating_encoder:
53+
#
54+
# Creating an encoder
55+
# -------------------
56+
#
57+
# Let's instantiate a :class:`~torchcodec.encoders.VideoEncoder`. We will need to provide
58+
# the frames to be encoded as a 4D tensor of shape
59+
# ``(num_frames, num_channels, height, width)`` with values in the ``[0, 255]``
60+
# range and ``torch.uint8`` dtype. We will also need to provide the frame rate of the input
61+
# video.
62+
#
63+
# .. note::
64+
#
65+
# The ``frame_rate`` parameter corresponds to the frame rate of the
66+
# *input* video. It will also be used for the frame rate of the *output* encoded video.
67+
from torchcodec.encoders import VideoEncoder
68+
69+
print(f"{frames.shape = }, {frames.dtype = }")
70+
print(f"{frame_rate = } fps")
71+
72+
encoder = VideoEncoder(frames=frames, frame_rate=frame_rate)
73+
74+
# %%
75+
# Encoding to file, bytes, or file-like
76+
# -------------------------------------
77+
#
78+
# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a
79+
# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to
80+
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_file_like`
81+
# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`.
82+
# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we
83+
# can easily inspect and display the encoded video.
84+
85+
encoded_frames = encoder.to_tensor(format="mp4")
86+
play_video(encoded_frames)
87+
88+
# %%
89+
#
90+
# Now that we have encoded data, we can decode it back to verify the
91+
# round-trip encode/decode process works as expected:
92+
93+
decoder_verify = VideoDecoder(encoded_frames)
94+
decoded_frames = decoder_verify.get_frames_in_range(0, 60).data
95+
96+
print(f"Re-decoded video: {decoded_frames.shape = }")
97+
print(f"Original frames: {frames.shape = }")
98+
99+
# %%
100+
# .. _codec_selection:
101+
#
102+
# Codec Selection
103+
# ---------------
104+
#
105+
# By default, the codec used is selected automatically using the file extension provided
106+
# in the ``dest`` parameter for the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method,
107+
# or using the ``format`` parameter for the
108+
# :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` and
109+
# :meth:`~torchcodec.encoders.VideoEncoder.to_tensor` methods.
110+
#
111+
# For example, when encoding to MP4 format, the default codec is typically ``H.264``.
112+
#
113+
# To use a codec other than the default, use the ``codec`` parameter.
114+
# You can specify either a specific codec implementation (e.g., ``"libx264"``)
115+
# or a codec specification (e.g., ``"h264"``). Different codecs offer
116+
# different tradeoffs between quality, file size, and encoding speed.
117+
#
118+
# .. note::
119+
#
120+
# To see available encoders on your system, run ``ffmpeg -encoders``.
121+
#
122+
# Let's encode the same frames using different codecs:
123+
124+
import tempfile
125+
from pathlib import Path
126+
127+
# H.264 encoding
128+
h264_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
129+
encoder.to_file(h264_output, codec="libx264")
130+
131+
# H.265 encoding
132+
hevc_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
133+
encoder.to_file(hevc_output, codec="hevc")
134+
135+
# Now let's use ffprobe to verify the codec used in the output files
136+
import subprocess
137+
138+
for output, name in [(h264_output, "h264_output"), (hevc_output, "hevc_output")]:
139+
result = subprocess.run(
140+
[
141+
"ffprobe",
142+
"-v",
143+
"error",
144+
"-select_streams",
145+
"v:0",
146+
"-show_entries",
147+
"stream=codec_name",
148+
"-of",
149+
"default=noprint_wrappers=1:nokey=1",
150+
output,
151+
],
152+
capture_output=True,
153+
text=True,
154+
)
155+
print(f"Codec used in {name}: {result.stdout.strip()}")
156+
157+
158+
# %%
159+
# .. _pixel_format:
160+
#
161+
# Pixel Format
162+
# ------------
163+
#
164+
# The ``pixel_format`` parameter controls the color sampling (chroma subsampling)
165+
# of the output video. This affects both quality and file size.
166+
#
167+
# Common pixel formats:
168+
#
169+
# - ``"yuv420p"`` - 4:2:0 chroma subsampling (standard quality, smaller file size, widely compatible)
170+
# - ``"yuv444p"`` - 4:4:4 chroma subsampling (full chroma resolution, higher quality, larger file size)
171+
#
172+
# Most playback devices and platforms support ``yuv420p``, making it the most
173+
# common choice for video encoding.
174+
#
175+
# .. note::
176+
#
177+
# Pixel format support depends on the codec used. Use ``ffmpeg -h encoder=<codec_name>``
178+
# to check available options for your selected codec.
179+
180+
# Standard pixel format
181+
yuv420_encoded_frames = encoder.to_tensor(
182+
format="mp4", codec="libx264", pixel_format="yuv420p"
183+
)
184+
play_video(yuv420_encoded_frames)
185+
186+
# %%
187+
# .. _crf:
188+
#
189+
# CRF (Constant Rate Factor)
190+
# --------------------------
191+
#
192+
# The ``crf`` parameter controls video quality, where lower values produce higher quality output.
193+
#
194+
# For example, with the commonly used H.264 codec, ``libx264``:
195+
#
196+
# - Values range from 0 (lossless) to 51 (worst quality)
197+
# - Values 17 or 18 are considered visually lossless, and the default is 23.
198+
#
199+
# .. note::
200+
#
201+
# The range and interpretation of CRF values depend on the codec used, and
202+
# not all codecs support CRF. Use ``ffmpeg -h encoder=<codec_name>`` to
203+
# check available options for your selected codec.
204+
#
205+
206+
# High quality (low CRF)
207+
high_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=0)
208+
play_video(high_quality_output)
209+
210+
# %%
211+
# Low quality (high CRF)
212+
low_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=50)
213+
play_video(low_quality_output)
214+
215+
216+
# %%
217+
# .. _preset:
218+
#
219+
# Preset
220+
# ------
221+
#
222+
# The ``preset`` parameter controls the tradeoff between encoding speed and file compression.
223+
# Faster presets encode faster but produce larger files, while slower
224+
# presets take more time to encode but result in better compression.
225+
#
226+
# For example, with the commonly used H.264 codec, ``libx264`` presets include
227+
# ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, and
228+
# ``"veryslow"`` (slowest, best compression). See the
229+
# `H.264 Video Encoding Guide <https://trac.ffmpeg.org/wiki/Encode/H.264#a2.Chooseapresetandtune>`_
230+
# for additional details.
231+
#
232+
# .. note::
233+
#
234+
# Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=<codec_name>``
235+
# to check available options for your selected codec.
236+
#
237+
238+
# Fast encoding with a larger file size
239+
fast_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
240+
encoder.to_file(fast_output, codec="libx264", preset="ultrafast")
241+
print(f"Size of fast encoded file: {Path(fast_output).stat().st_size} bytes")
242+
243+
# Slow encoding for a smaller file size
244+
slow_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
245+
encoder.to_file(slow_output, codec="libx264", preset="veryslow")
246+
print(f"Size of slow encoded file: {Path(slow_output).stat().st_size} bytes")
247+
248+
# %%
249+
# .. _extra_options:
250+
#
251+
# Extra Options
252+
# -------------
253+
#
254+
# The ``extra_options`` parameter accepts a dictionary of codec-specific options
255+
# that would normally be set via FFmpeg command-line arguments. This enables
256+
# control of encoding settings beyond the common parameters.
257+
#
258+
# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include:
259+
#
260+
# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval
261+
# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames
262+
# - ``"tune"`` - Tuning preset (e.g., ``"film"``, ``"animation"``, ``"grain"``)
263+
#
264+
# .. note::
265+
#
266+
# Use ``ffmpeg -h encoder=<codec_name>`` to see all available options for
267+
# a specific codec.
268+
#
269+
270+
271+
custom_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
272+
encoder.to_file(
273+
custom_output,
274+
codec="libx264",
275+
extra_options={
276+
"g": 50, # Keyframe every 50 frames
277+
"max_b_frames": 0, # Disable B-frames for faster decoding
278+
"tune": "fastdecode", # Optimize for fast decoding
279+
}
280+
)
281+
282+
# %%

src/torchcodec/encoders/_video_encoder.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,22 @@ def to_file(
5151
codec (str, optional): The codec to use for encoding (e.g., "libx264",
5252
"h264"). If not specified, the default codec
5353
for the container format will be used.
54+
See :ref:`codec_selection` for details.
5455
pixel_format (str, optional): The pixel format for encoding (e.g.,
5556
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
57+
See :ref:`pixel_format` for details.
5658
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
57-
mean better quality. Valid range depends on the encoder (commonly 0-51).
59+
mean better quality. Valid range depends on the encoder (e.g. 0-51 for libx264).
5860
Defaults to None (which will use encoder's default).
61+
See :ref:`crf` for details.
5962
preset (str or int, optional): Encoder option that controls the tradeoff between
60-
encoding speed and compression. Valid values depend on the encoder (commonly
63+
encoding encoding speed and compression (output size). Valid on the encoder (commonly
6164
a string: "fast", "medium", "slow"). Defaults to None
6265
(which will use encoder's default).
66+
See :ref:`preset` for details.
6367
extra_options (dict[str, Any], optional): A dictionary of additional
6468
encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
65-
Values will be converted to strings before passing to the encoder.
69+
See :ref:`extra_options` for details.
6670
"""
6771
preset = str(preset) if isinstance(preset, int) else preset
6872
_core.encode_video_to_file(
@@ -96,18 +100,22 @@ def to_tensor(
96100
codec (str, optional): The codec to use for encoding (e.g., "libx264",
97101
"h264"). If not specified, the default codec
98102
for the container format will be used.
103+
See :ref:`codec_selection` for details.
99104
pixel_format (str, optional): The pixel format to encode frames into (e.g.,
100105
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
106+
See :ref:`pixel_format` for details.
101107
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
102-
mean better quality. Valid range depends on the encoder (commonly 0-51).
108+
mean better quality. Valid range depends on the encoder (e.g. 0-51 for libx264).
103109
Defaults to None (which will use encoder's default).
110+
See :ref:`crf` for details.
104111
preset (str or int, optional): Encoder option that controls the tradeoff between
105-
encoding speed and compression. Valid values depend on the encoder (commonly
112+
encoding encoding speed and compression (output size). Valid on the encoder (commonly
106113
a string: "fast", "medium", "slow"). Defaults to None
107114
(which will use encoder's default).
115+
See :ref:`preset` for details.
108116
extra_options (dict[str, Any], optional): A dictionary of additional
109117
encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
110-
Values will be converted to strings before passing to the encoder.
118+
See :ref:`extra_options` for details.
111119
112120
Returns:
113121
Tensor: The raw encoded bytes as 1D uint8 Tensor.
@@ -150,18 +158,22 @@ def to_file_like(
150158
codec (str, optional): The codec to use for encoding (e.g., "libx264",
151159
"h264"). If not specified, the default codec
152160
for the container format will be used.
161+
See :ref:`codec_selection` for details.
153162
pixel_format (str, optional): The pixel format for encoding (e.g.,
154163
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
164+
See :ref:`pixel_format` for details.
155165
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
156-
mean better quality. Valid range depends on the encoder (commonly 0-51).
166+
mean better quality. Valid range depends on the encoder (e.g. 0-51 for libx264).
157167
Defaults to None (which will use encoder's default).
168+
See :ref:`crf` for details.
158169
preset (str or int, optional): Encoder option that controls the tradeoff between
159-
encoding speed and compression. Valid values depend on the encoder (commonly
170+
encoding encoding speed and compression (output size). Valid on the encoder (commonly
160171
a string: "fast", "medium", "slow"). Defaults to None
161172
(which will use encoder's default).
173+
See :ref:`preset` for details.
162174
extra_options (dict[str, Any], optional): A dictionary of additional
163175
encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
164-
Values will be converted to strings before passing to the encoder.
176+
See :ref:`extra_options` for details.
165177
"""
166178
preset = str(preset) if isinstance(preset, int) else preset
167179
_core.encode_video_to_file_like(

0 commit comments

Comments
 (0)