Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT Video Converter: Adding Images to Videos #702

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions doc/code/converters/7_video_converters.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding Images to a Video"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\bjagdagdorj\\.conda\\envs\\pyrit2\\Lib\\site-packages\\fpdf\\__init__.py:40: UserWarning: You have both PyFPDF & fpdf2 installed. Both packages cannot be installed at the same time as they share the same module namespace. To only keep fpdf2, run: pip uninstall --yes pypdf && pip install --upgrade fpdf2\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"NO filename provided, using default\n",
"Output video filename: C:\\Users\\bjagdagdorj\\Documents\\tools\\pyrit2\\PyRIT\\dbdata\\prompt-memory-entries\\videos\\1740610481714274.mp4\n"
]
},
{
"data": {
"text/plain": [
"ConverterResult(output_text='C:\\\\Users\\\\bjagdagdorj\\\\Documents\\\\tools\\\\pyrit2\\\\PyRIT\\\\dbdata\\\\prompt-memory-entries\\\\videos\\\\1740610481714274.mp4', output_type='video_path')"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"\n",
"from pyrit.common import IN_MEMORY, initialize_pyrit\n",
"from pyrit.prompt_converter import AddImageVideoConverter\n",
"\n",
"initialize_pyrit(memory_db_type=IN_MEMORY)\n",
"\n",
"input_video = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"sample_video.mp4\")\n",
"output_video = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"sample_output_video.mp4\")\n",
"input_image = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"ice_cream.png\")\n",
"\n",
"video = AddImageVideoConverter(video_path=input_video, img_resize_size=(100,100))\n",
"converted_vid = await video.convert_async(prompt=input_image, input_type=\"image_path\")\n",
"converted_vid"
]
}
],
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
28 changes: 28 additions & 0 deletions doc/code/converters/7_video_converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.4
# ---

# %% [markdown]
# # Adding Images to a Video

# %%
import pathlib

from pyrit.common import IN_MEMORY, initialize_pyrit
from pyrit.prompt_converter import AddImageVideoConverter

initialize_pyrit(memory_db_type=IN_MEMORY)

input_video = str(pathlib.Path(".") / ".." / ".." / ".." / "assets" / "sample_video.mp4")
output_video = str(pathlib.Path(".") / ".." / ".." / ".." / "assets" / "sample_output_video.mp4")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this output_video path isn't being used right now, make sure to include it in the converter init on L26

input_image = str(pathlib.Path(".") / ".." / ".." / ".." / "assets" / "ice_cream.png")

video = AddImageVideoConverter(video_path=input_video, img_resize_size=(100, 100))
converted_vid = await video.convert_async(prompt=input_image, input_type="image_path")
converted_vid
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ dependencies = [
"ipykernel>=6.29.4",
"numpy>=1.26.4",
"openai>=1.58.1",
"opencv-python>=4.11.0.86",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you figure out what this pulls in? Might be a rather heavyweight package. If you're not sure how please lmk and we can do it together 🙂

"pillow>=10.3.0",
"pydantic>=2.7.1",
"pyodbc>=5.1.0",
Expand Down
4 changes: 3 additions & 1 deletion pyrit/prompt_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter

from pyrit.prompt_converter.add_image_text_converter import AddImageTextConverter
from pyrit.prompt_converter.add_image_to_video_converter import AddImageVideoConverter
from pyrit.prompt_converter.add_text_image_converter import AddTextImageConverter
from pyrit.prompt_converter.ansi_escape.ansi_attack_converter import AnsiAttackConverter
from pyrit.prompt_converter.ascii_art_converter import AsciiArtConverter
from pyrit.prompt_converter.ascii_smuggler_converter import AsciiSmugglerConverter
from pyrit.prompt_converter.atbash_converter import AtbashConverter
from pyrit.prompt_converter.audio_frequency_converter import AudioFrequencyConverter
from pyrit.prompt_converter.audio_frequency_converter import AudioFrequencyConverter
from pyrit.prompt_converter.azure_speech_audio_to_text_converter import AzureSpeechAudioToTextConverter
from pyrit.prompt_converter.azure_speech_text_to_audio_converter import AzureSpeechTextToAudioConverter
from pyrit.prompt_converter.base64_converter import Base64Converter
Expand Down Expand Up @@ -57,6 +58,7 @@

__all__ = [
"AddImageTextConverter",
"AddImageVideoConverter",
"AddTextImageConverter",
"AnsiAttackConverter",
"AsciiArtConverter",
Expand Down
172 changes: 172 additions & 0 deletions pyrit/prompt_converter/add_image_to_video_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
import os
from pathlib import Path

import cv2
import numpy as np

from pyrit.common.path import DB_DATA_PATH
from pyrit.models import PromptDataType, data_serializer_factory
from pyrit.prompt_converter import ConverterResult, PromptConverter

logger = logging.getLogger(__name__)

# Get input file extension

# Choose the codec based on extension
video_encoding_map = {
"mp4": "mp4v",
"avi": "XVID",
"mov": "MJPG",
"mkv": "X264",
}


class AddImageVideoConverter(PromptConverter):
"""
Adds an image to a video at a specified position.
Also, currently the image is placed in the whole video, not at a specific timepoint

Args:
video_path (str): File path of video to add image to
output_path (str, Optional): File path of output video. Defaults to None.
img_position (tuple, Optional): Position to place image in video. Defaults to (10, 10).
img_resize_size (tuple, Optional): Size to resize image to. Defaults to (500, 500).
"""

def __init__(
self,
video_path: str,
output_path: str = None,
img_position: tuple = (10, 10),
img_resize_size: tuple = (500, 500),
):
if not video_path:
raise ValueError("Please provide valid image path")

self._output_path = output_path
self._img_position = img_position
self._img_resize_size = img_resize_size
self._video_path = video_path

async def _add_image_to_video(self, image_path: str, output_path: str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggest async def _add_image_to_video(self, image_path: str, output_path: str) -> str:

"""
Adds image to video
Args:
Image Path (str): The image path to add to video.

Returns:
Image.Image: The image with added text.
"""

if not image_path:
raise ValueError("Please provide valid image path value")

input_image_data = data_serializer_factory(
category="prompt-memory-entries", data_type="image_path", value=image_path
)
input_video_data = data_serializer_factory(
category="prompt-memory-entries", data_type="video_path", value=self._video_path
)

# Open the video to ensure it exists
video_bytes = await input_video_data.read_data()

azure_storage_flag = False
video_path = self._video_path

try:
if input_video_data._is_azure_storage_url(self._video_path):
# If the video is in Azure storage, download it first

# Save the video bytes to a temporary file
local_temp_path = Path(DB_DATA_PATH, "temp_video.mp4")
with open(local_temp_path, "wb") as f:
f.write(video_bytes)
video_path = str(local_temp_path)
azure_storage_flag = True

cap = cv2.VideoCapture(video_path)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
file_extension = video_path.split(".")[-1].lower()
if file_extension in video_encoding_map:
video_char_code = cv2.VideoWriter_fourcc(*video_encoding_map[file_extension])
out = cv2.VideoWriter(output_path, video_char_code, fps, (width, height))
else:
raise ValueError(f"Unsupported video format: {file_extension}")

# Load and resize the overlay image

input_image_bytes = await input_image_data.read_data()
image_np_arr = np.frombuffer(input_image_bytes, np.uint8)
overlay = cv2.imdecode(image_np_arr, cv2.IMREAD_UNCHANGED)
overlay = cv2.resize(overlay, self._img_resize_size)

# Get overlay image dimensions
h, w, _ = overlay.shape
x, y = self._img_position # Position where the overlay will be placed

while cap.isOpened():
ret, frame = cap.read()
if not ret:
break

# Ensure overlay fits within the frame boundaries
if x + w > width or y + h > height:
print("Overlay image is too large for the video frame. Resizing to fit.")
overlay = cv2.resize(overlay, (width - x, height - y))

# Blend overlay with frame
frame[y : y + h, x : x + w] = overlay

# Write the modified frame to the output video
out.write(frame)

finally:
# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()
if azure_storage_flag:
os.remove(local_temp_path)

logger.info(f"Video saved as {output_path}")

return output_path

async def convert_async(self, *, prompt: str, input_type: PromptDataType = "image_path") -> ConverterResult:
"""
Converter that overlays input text on the img_to_add.

Args:
prompt (str): The image file name to be added to the video.
input_type (PromptDataType): type of data
Returns:
ConverterResult: The filename of the converted video as a ConverterResult Object
"""
if not self.input_supported(input_type):
raise ValueError("Input type not supported")

output_video_serializer = data_serializer_factory(category="prompt-memory-entries", data_type="video_path")

if not self._output_path:
output_video_serializer.value = await output_video_serializer.get_data_filename()
else:
output_video_serializer.value = self._output_path

# Add video to the image
updated_video = await self._add_image_to_video(image_path=prompt, output_path=output_video_serializer.value)
return ConverterResult(output_text=str(updated_video), output_type="video_path")

def input_supported(self, input_type: PromptDataType) -> bool:
return input_type == "image_path"

def output_supported(self, output_type: PromptDataType) -> bool:
return output_type == "video_path"
Loading