Skip to content

FEAT Video Converter: Adding Images to Videos #702

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1367ae0
initial commit adding new files
Feb 10, 2025
48e3c9e
adding converter file
Feb 10, 2025
a24a804
adding converter file
Feb 10, 2025
c425af0
removing testing code
Feb 10, 2025
0997c53
pre-commit
Feb 10, 2025
40e9b00
Merge branch 'main' of https://github.com/Azure/PyRIT into users/bjag…
Feb 25, 2025
841a7e3
Merge branch 'main' of https://github.com/Azure/PyRIT into users/bjag…
Feb 26, 2025
a598b14
working version of video converter still draft though
Feb 26, 2025
cb15543
working version of video converter still draft though
Feb 26, 2025
c77e9e8
addressed feedback
Feb 26, 2025
e8229f2
Merge branch 'main' of https://github.com/Azure/PyRIT into users/bjag…
Feb 26, 2025
419d7da
addressed feedback, fix doc strings, format, minor edits
Feb 27, 2025
39953eb
Merge branch 'main' of https://github.com/Azure/PyRIT into users/bjag…
Mar 1, 2025
e8cdb43
added unit tests, addressed formatting changes
Mar 2, 2025
023f8bf
remove print statement
Mar 2, 2025
81870a0
adding new notebook to toc
Mar 2, 2025
5f16d2b
adding sample video and formatting
Mar 3, 2025
60e4d18
Merge branch 'main' of https://github.com/Azure/PyRIT into users/bjag…
Mar 3, 2025
ddc1728
adding comment and getting latest code
Mar 3, 2025
eea9c3b
make opencv optional dependency
Mar 4, 2025
cafb2b1
make opencv optional dependency
Mar 4, 2025
77c08d0
made opencv optional dependency
Mar 4, 2025
c198aae
pre-commit
Mar 4, 2025
b4cc7f0
Merge branch 'main' of https://github.com/Azure/PyRIT into users/bjag…
Mar 4, 2025
97b433a
minor comment change
Mar 4, 2025
893b99e
precommit
Mar 4, 2025
b2f73a7
precommit
Mar 4, 2025
db10632
precommit
Mar 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added assets/sample_video.mp4
Binary file not shown.
1 change: 1 addition & 0 deletions doc/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ chapters:
- file: code/converters/4_image_converters
- file: code/converters/5_selectively_converting
- file: code/converters/6_human_converter
- file: code/converters/7_video_converters
- file: code/converters/ansi_attack_converter
- file: code/converters/char_swap_attack_generator
- file: code/converters/pdf_converter
Expand Down
63 changes: 63 additions & 0 deletions doc/code/converters/7_video_converters.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Adding Images to a Video\n",
"\n",
"Adds an image to a video.\n",
"To use this converter you'll need to install opencv which can be done with \n",
"`pip install pyrit[opencv]`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ConverterResult(output_text='C:\\\\Users\\\\bjagdagdorj\\\\Documents\\\\tools\\\\pyrit2\\\\PyRIT\\\\dbdata\\\\prompt-memory-entries\\\\videos\\\\1741114936092652.mp4', output_type='video_path')"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pathlib\n",
"\n",
"from pyrit.common import IN_MEMORY, initialize_pyrit\n",
"from pyrit.prompt_converter import AddImageVideoConverter\n",
"\n",
"initialize_pyrit(memory_db_type=IN_MEMORY)\n",
"\n",
"input_video = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"sample_video.mp4\")\n",
"input_image = str(pathlib.Path(\".\") / \"..\" / \"..\" / \"..\" / \"assets\" / \"pyrit_architecture.png\")\n",
"\n",
"video = AddImageVideoConverter(video_path=input_video)\n",
"converted_vid = await video.convert_async(prompt=input_image, input_type=\"image_path\") # type: ignore\n",
"converted_vid"
]
}
],
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
31 changes: 31 additions & 0 deletions doc/code/converters/7_video_converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.4
# ---

# %% [markdown]
# # Adding Images to a Video
#
# Adds an image to a video.
# To use this converter you'll need to install opencv which can be done with
# `pip install pyrit[opencv]`

# %%
import pathlib

from pyrit.common import IN_MEMORY, initialize_pyrit
from pyrit.prompt_converter import AddImageVideoConverter

initialize_pyrit(memory_db_type=IN_MEMORY)

input_video = str(pathlib.Path(".") / ".." / ".." / ".." / "assets" / "sample_video.mp4")
input_image = str(pathlib.Path(".") / ".." / ".." / ".." / "assets" / "pyrit_architecture.png")

video = AddImageVideoConverter(video_path=input_video)
converted_vid = await video.convert_async(prompt=input_image, input_type="image_path") # type: ignore
converted_vid
10 changes: 7 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,26 @@ playwright = [
"ollama>=0.4.4"
]

opencv = [
"opencv-python>=4.11.0.86",
]
all = [
"accelerate==0.34.2",
"azureml-mlflow==1.57.0",
"black>=24.4.0",
"flake8>=7.0.0",
"flake8-copyright>=0.2.4",
"flask>=3.1.0",
"jupyter-book>=1.0.2",
"jupytext>=1.16.1",
"mlflow==2.16.2",
"ml-collections==0.1.1",
"mypy>=1.9.0",
"mock-alchemy>=0.2.6",
"numpy<2",
"ollama>=0.4.4",
"opencv-python>=4.11.0.86",
"playwright==1.49.0",
"pre-commit>=3.3.3",
"pytest>=7.3.1",
"pytest-asyncio>=0.23.5",
Expand All @@ -135,9 +142,6 @@ all = [
"semantic-kernel>=1.20.0",
"sentencepiece==0.2.0",
"torch>=2.3.0",
"playwright==1.49.0",
"flask>=3.1.0",
"ollama>=0.4.4",
"types-PyYAML>=6.0.12.9",
]

Expand Down
2 changes: 2 additions & 0 deletions pyrit/prompt_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pyrit.prompt_converter.prompt_converter import ConverterResult, PromptConverter

from pyrit.prompt_converter.add_image_text_converter import AddImageTextConverter
from pyrit.prompt_converter.add_image_to_video_converter import AddImageVideoConverter
from pyrit.prompt_converter.add_text_image_converter import AddTextImageConverter
from pyrit.prompt_converter.ansi_escape.ansi_attack_converter import AnsiAttackConverter
from pyrit.prompt_converter.ascii_art_converter import AsciiArtConverter
Expand Down Expand Up @@ -57,6 +58,7 @@

__all__ = [
"AddImageTextConverter",
"AddImageVideoConverter",
"AddTextImageConverter",
"AnsiAttackConverter",
"AsciiArtConverter",
Expand Down
186 changes: 186 additions & 0 deletions pyrit/prompt_converter/add_image_to_video_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
import os
from pathlib import Path

import numpy as np

from pyrit.common.path import DB_DATA_PATH
from pyrit.models import PromptDataType, data_serializer_factory
from pyrit.prompt_converter import ConverterResult, PromptConverter

logger = logging.getLogger(__name__)


# Choose the codec based on extension
video_encoding_map = {
"mp4": "mp4v",
"avi": "XVID",
"mov": "MJPG",
"mkv": "X264",
}


class AddImageVideoConverter(PromptConverter):
"""
Adds an image to a video at a specified position.
Also, currently the image is placed in the whole video, not at a specific timepoint

Args:
video_path (str): File path of video to add image to
output_path (str, Optional): File path of output video. Defaults to None.
img_position (tuple, Optional): Position to place image in video. Defaults to (10, 10).
img_resize_size (tuple, Optional): Size to resize image to. Defaults to (500, 500).
"""

def __init__(
self,
video_path: str,
output_path: str = None,
img_position: tuple = (10, 10),
img_resize_size: tuple = (500, 500),
):

if not video_path:
raise ValueError("Please provide valid video path")

self._output_path = output_path
self._img_position = img_position
self._img_resize_size = img_resize_size
self._video_path = video_path

async def _add_image_to_video(self, image_path: str, output_path: str) -> str:
"""
Adds image to video
Args:
image_path (str): The image path to add to video.
output_path (str): The output video path.

Returns:
output_path (str): The output video path.
"""

try:
import cv2 # noqa: F401
except ModuleNotFoundError as e:
logger.error("Could not import opencv. You may need to install it via 'pip install pyrit[opencv]'")
raise e

if not image_path:
raise ValueError("Please provide valid image path value")

input_image_data = data_serializer_factory(
category="prompt-memory-entries", data_type="image_path", value=image_path
)
input_video_data = data_serializer_factory(
category="prompt-memory-entries", data_type="video_path", value=self._video_path
)

# Open the video to ensure it exists
video_bytes = await input_video_data.read_data()

azure_storage_flag = input_video_data._is_azure_storage_url(self._video_path)
video_path = self._video_path

try:
if azure_storage_flag:
# If the video is in Azure storage, download it first

# Save the video bytes to a temporary file
local_temp_path = Path(DB_DATA_PATH, "temp_video.mp4")
with open(local_temp_path, "wb") as f:
f.write(video_bytes)
video_path = str(local_temp_path)

cap = cv2.VideoCapture(video_path)

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
file_extension = video_path.split(".")[-1].lower()
if file_extension in video_encoding_map:
video_char_code = cv2.VideoWriter_fourcc(*video_encoding_map[file_extension]) # type: ignore
output_video = cv2.VideoWriter(output_path, video_char_code, fps, (width, height))
else:
raise ValueError(f"Unsupported video format: {file_extension}")

# Load and resize the overlay image

input_image_bytes = await input_image_data.read_data()
image_np_arr = np.frombuffer(input_image_bytes, np.uint8)
overlay = cv2.imdecode(image_np_arr, cv2.IMREAD_UNCHANGED)
overlay = cv2.resize(overlay, self._img_resize_size)

# Get overlay image dimensions
image_height, image_width, _ = overlay.shape
x, y = self._img_position # Position where the overlay will be placed

while cap.isOpened():
ret, frame = cap.read()
if not ret:
break

# Ensure overlay fits within the frame boundaries
if x + image_width > width or y + image_height > height:
logger.info("Overlay image is too large for the video frame. Resizing to fit.")
overlay = cv2.resize(overlay, (width - x, height - y))
image_height, image_width, _ = overlay.shape

# Blend overlay with frame
if overlay.shape[2] == 4: # Check number of channels on image
alpha_overlay = overlay[:, :, 3] / 255.0
for c in range(0, 3):
frame[y : y + image_height, x : x + image_width, c] = (
alpha_overlay * overlay[:, :, c]
+ (1 - alpha_overlay) * frame[y : y + image_height, x : x + image_width, c]
)
else:
frame[y : y + image_height, x : x + image_width] = overlay

# Write the modified frame to the output video
output_video.write(frame)

finally:
# Release everything
cap.release()
output_video.release()
cv2.destroyAllWindows()
if azure_storage_flag:
os.remove(local_temp_path)

logger.info(f"Video saved as {output_path}")

return output_path

async def convert_async(self, *, prompt: str, input_type: PromptDataType = "image_path") -> ConverterResult:
"""
Converter that adds an image to a video

Args:
prompt (str): The image file name to be added to the video.
input_type (PromptDataType): type of data
Returns:
ConverterResult: The filename of the converted video as a ConverterResult Object
"""
if not self.input_supported(input_type):
raise ValueError("Input type not supported")

output_video_serializer = data_serializer_factory(category="prompt-memory-entries", data_type="video_path")

if not self._output_path:
output_video_serializer.value = await output_video_serializer.get_data_filename()
else:
output_video_serializer.value = self._output_path

# Add video to the image
updated_video = await self._add_image_to_video(image_path=prompt, output_path=output_video_serializer.value)
return ConverterResult(output_text=str(updated_video), output_type="video_path")

def input_supported(self, input_type: PromptDataType) -> bool:
return input_type == "image_path"

def output_supported(self, output_type: PromptDataType) -> bool:
return output_type == "video_path"
Loading