From d589a1e9d2c63eabbf0888ba45145d256751d47d Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Thu, 12 Dec 2024 17:44:28 -0800 Subject: [PATCH] Fix center_aspect_fit, add center_aspect_scale resizing option (#1222) --- .changeset/tiny-papayas-film.md | 9 +++++ .../livekit/agents/utils/images/image.py | 40 +++++++++++++++++-- .../livekit/plugins/anthropic/llm.py | 2 +- .../plugins/openai/beta/assistant_llm.py | 2 +- .../livekit/plugins/openai/utils.py | 2 +- 5 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 .changeset/tiny-papayas-film.md diff --git a/.changeset/tiny-papayas-film.md b/.changeset/tiny-papayas-film.md new file mode 100644 index 0000000000..07ccea04cb --- /dev/null +++ b/.changeset/tiny-papayas-film.md @@ -0,0 +1,9 @@ +--- +"livekit-agents": patch +"livekit-plugins-anthropic": patch +"livekit-plugins-openai": patch +--- + +Fix center_aspect_fit bug, add scale_aspect_fit and scale_aspect_fill resizing options. + +Make scale_aspect_fit the new default resizing option for video frames. diff --git a/livekit-agents/livekit/agents/utils/images/image.py b/livekit-agents/livekit/agents/utils/images/image.py index 15755284d2..bcc0a5b5f5 100644 --- a/livekit-agents/livekit/agents/utils/images/image.py +++ b/livekit-agents/livekit/agents/utils/images/image.py @@ -33,7 +33,18 @@ class EncodeOptions: class ResizeOptions: width: int height: int - strategy: Literal["center_aspect_fit", "center_aspect_cover", "skew"] + strategy: Literal[ + # Fit the image into the provided dimensions, with letterboxing + "center_aspect_fit", + # Fill the provided dimensions, with cropping + "center_aspect_cover", + # Fit the image into the provided dimensions, preserving its original aspect ratio + "scale_aspect_fit", + # Fill the provided dimensions, preserving its original aspect ratio (image will be larger than the provided dimensions) + "scale_aspect_cover", + # Precisely resize the image to the provided dimensions + "skew", + ] def import_pil(): @@ -83,10 +94,11 @@ def _resize_image(image: Any, options: EncodeOptions): # If the new image is wider than the original if resize_opts.width / resize_opts.height > image.width / image.height: - new_width = resize_opts.width - new_height = int(image.height * (resize_opts.width / image.width)) + new_height = resize_opts.height + new_width = int(image.width * (resize_opts.height / image.height)) resized = image.resize((new_width, new_height)) + Image.Image.paste( result, resized, @@ -118,5 +130,27 @@ def _resize_image(image: Any, options: EncodeOptions): ), ) return result + elif resize_opts.strategy == "scale_aspect_fill": + # Start with assuming width is the limiting dimension + new_width = resize_opts.width + new_height = int(image.height * (resize_opts.width / image.width)) + + # If height is under the limit, scale based on height instead + if new_height < resize_opts.height: + new_height = resize_opts.height + new_width = int(image.width * (resize_opts.height / image.height)) + + return image.resize((new_width, new_height)) + elif resize_opts.strategy == "scale_aspect_fit": + # Start with assuming width is the limiting dimension + new_width = resize_opts.width + new_height = int(image.height * (resize_opts.width / image.width)) + + # If height would exceed the limit, scale based on height instead + if new_height > resize_opts.height: + new_height = resize_opts.height + new_width = int(image.width * (resize_opts.height / image.height)) + + return image.resize((new_width, new_height)) raise ValueError(f"Unknown resize strategy: {resize_opts.strategy}") diff --git a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py index d9e1b2a0dd..644d8432e0 100644 --- a/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py +++ b/livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py @@ -441,7 +441,7 @@ def _build_anthropic_image_content( opts.resize_options = utils.images.ResizeOptions( width=image.inference_width, height=image.inference_height, - strategy="center_aspect_fit", + strategy="scale_aspect_fit", ) encoded_data = utils.images.encode(image.image, opts) diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/beta/assistant_llm.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/beta/assistant_llm.py index a5e3fe2296..7df336e890 100644 --- a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/beta/assistant_llm.py +++ b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/beta/assistant_llm.py @@ -532,7 +532,7 @@ async def _upload_frame( opts.resize_options = utils.images.ResizeOptions( width=inference_width, height=inference_height, - strategy="center_aspect_fit", + strategy="scale_aspect_fit", ) encoded_data = utils.images.encode(frame, opts) diff --git a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/utils.py b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/utils.py index 4e8d40c00e..ed830e59e4 100644 --- a/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/utils.py +++ b/livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/utils.py @@ -78,7 +78,7 @@ def _build_oai_image_content(image: llm.ChatImage, cache_key: Any): opts.resize_options = utils.images.ResizeOptions( width=image.inference_width, height=image.inference_height, - strategy="center_aspect_fit", + strategy="scale_aspect_fit", ) encoded_data = utils.images.encode(image.image, opts)