Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT Video Converter: Adding Images to Videos #702

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions doc/code/converters/7_video_converters.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding Images to a Video"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\bjagdagdorj\\.conda\\envs\\pyrit2\\Lib\\site-packages\\fpdf\\__init__.py:40: UserWarning: You have both PyFPDF & fpdf2 installed. Both packages cannot be installed at the same time as they share the same module namespace. To only keep fpdf2, run: pip uninstall --yes pypdf && pip install --upgrade fpdf2\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Video saved as ..\\..\\..\\assets\\sample_output_video.mp4\n"
]
},
{
"data": {
"text/plain": [
"ConverterResult(output_text='..\\\\..\\\\..\\\\assets\\\\sample_output_video.mp4', output_type='video_path')"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"\n",
"from pyrit.common.path import DATASETS_PATH\n",
"from pyrit.prompt_converter import AddImageVideoConverter\n",
"\n",
"input_video = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"sample_video.mp4\")\n",
"output_video = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"sample_output_video.mp4\")\n",
"input_image = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"ice_cream.png\")\n",
"# overlay_image_on_video(input_video, input_image, output_video, position=(30, 30), resize_to=(400, 400))\n",
"\n",
"\n",
"video = AddImageVideoConverter(video_path=input_video, output_path=output_video)\n",
"converted_vid = await video.convert_async(prompt=input_image, input_type=\"image_path\")\n",
"converted_vid"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pyrit2",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ dependencies = [
"ipykernel>=6.29.4",
"numpy>=1.26.4",
"openai>=1.58.1",
"opencv-python>=4.11.0.86",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you figure out what this pulls in? Might be a rather heavyweight package. If you're not sure how please lmk and we can do it together 🙂

"pillow>=10.3.0",
"pydantic>=2.7.1",
"pyodbc>=5.1.0",
Expand Down
4 changes: 3 additions & 1 deletion pyrit/prompt_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter

from pyrit.prompt_converter.add_image_text_converter import AddImageTextConverter
from pyrit.prompt_converter.add_image_to_video_converter import AddImageVideoConverter
from pyrit.prompt_converter.add_text_image_converter import AddTextImageConverter
from pyrit.prompt_converter.ansi_escape.ansi_attack_converter import AnsiAttackConverter
from pyrit.prompt_converter.ascii_art_converter import AsciiArtConverter
from pyrit.prompt_converter.atbash_converter import AtbashConverter
from pyrit.prompt_converter.audio_frequency_converter import AudioFrequencyConverter
from pyrit.prompt_converter.audio_frequency_converter import AudioFrequencyConverter
from pyrit.prompt_converter.azure_speech_audio_to_text_converter import AzureSpeechAudioToTextConverter
from pyrit.prompt_converter.azure_speech_text_to_audio_converter import AzureSpeechTextToAudioConverter
from pyrit.prompt_converter.base64_converter import Base64Converter
Expand Down Expand Up @@ -53,6 +54,7 @@

__all__ = [
"AddImageTextConverter",
"AddImageVideoConverter",
"AddTextImageConverter",
"AnsiAttackConverter",
"AsciiArtConverter",
Expand Down
129 changes: 129 additions & 0 deletions pyrit/prompt_converter/add_image_to_video_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
import os

import cv2

from pyrit.models import PromptDataType, data_serializer_factory
from pyrit.prompt_converter import ConverterResult, PromptConverter

logger = logging.getLogger(__name__)


class AddImageVideoConverter(PromptConverter):
"""
Adds an image to a video at a specified position.

Args:
video_path (str): File path of video to add image to
output_path (str, Optional): File path of output video. Defaults to None.
img_position (tuple, Optional): Position to place image in video. Defaults to (10, 10).
img_resize_size (tuple, Optional): Size to resize image to. Defaults to (500, 500).
"""

def __init__(
self,
video_path: str,
output_path: str = None,
img_position: tuple = (10, 10),
img_resize_size: tuple = (500, 500),
):
if not video_path:
raise ValueError("Please provide valid image path")

self.output_path = output_path
self.img_position = img_position
self.img_resize_size = img_resize_size
self._video_path = video_path

def _add_image_to_video(self, image_path: str, output_path: str):
"""
Adds image to video
Args:
Image Path (str): The image path to add to video.

Returns:
Image.Image: The image with added text.
"""
if not image_path:
raise ValueError("Please provide valid image path value")

if not os.path.exists(image_path):
print(image_path)
raise ValueError("Image path does not exist")

if not os.path.exists(self._video_path):
print(self._video_path)
raise ValueError("Video path does not exist")

# Open the video
cap = cv2.VideoCapture(self._video_path)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec for MP4
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

# Load and resize the overlay image
overlay = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
overlay = cv2.resize(overlay, self.img_resize_size)

# Get overlay image dimensions
h, w, _ = overlay.shape
x, y = self.img_position # Position where the overlay will be placed

while cap.isOpened():
ret, frame = cap.read()
if not ret:
break

# Ensure overlay fits within the frame boundaries
if x + w > width or y + h > height:
print("Overlay image is too large for the video frame. Resizing to fit.")
overlay = cv2.resize(overlay, (width - x, height - y))

# Blend overlay with frame
frame[y : y + h, x : x + w] = overlay

# Write the modified frame to the output video
out.write(frame)

# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Video saved as {output_path}")

return output_path

async def convert_async(self, *, prompt: str, input_type: PromptDataType = "image_path") -> ConverterResult:
"""
Converter that overlays input text on the img_to_add.

Args:
prompt (str): The image file name to be added to the video.
input_type (PromptDataType): type of data
Returns:
ConverterResult: The filename of the converted video as a ConverterResult Object
"""
if not self.input_supported(input_type):
raise ValueError("Input type not supported")

output_video_serializer = data_serializer_factory(category="prompt-memory-entries", data_type="video_path")

if not self.output_path:
output_video_serializer.value = output_video_serializer.get_data_filename()
else:
output_video_serializer.value = self.output_path

# # Add video to the image
updated_video = self._add_image_to_video(image_path=prompt, output_path=output_video_serializer.value)
return ConverterResult(output_text=str(updated_video), output_type="video_path")

def input_supported(self, input_type: PromptDataType) -> bool:
return input_type == "image_path"
Loading