From e1946c60574edd35dde6d5ff32748d0beabf6aea Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Wed, 25 Oct 2023 17:26:30 +0530
Subject: [PATCH 1/6] Support For Directory Batch With a Single Prompt
Edit the python file to add location of the image directory that needs to be captioned (Line:22)
Edit Prompt that gets used (Line:46)
---
qwen-batch-single-pass.py | 68 +++++++++++++++++++++++++++++++++++++++
1 file changed, 68 insertions(+)
create mode 100644 qwen-batch-single-pass.py
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
new file mode 100644
index 0000000..3145097
--- /dev/null
+++ b/qwen-batch-single-pass.py
@@ -0,0 +1,68 @@
+import os
+import re
+import shutil
+import torch
+import time
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+
+# Function to check for unwanted elements in the caption
+def has_unwanted_elements(caption):
+ patterns = [r'[.*?]', r'.*?', r'\[\d+\]', r'\(\[\d+\]\)']
+ return any(re.search(pattern, caption) for pattern in patterns)
+
+# Function to clean up the caption
+def clean_caption(caption):
+ caption = re.sub(r'[(.*?)]', r'\1', caption)
+ caption = re.sub(r'.*?', '', caption)
+ return caption.strip()
+
+# Directory containing the images
+image_directory = '/path/to/img_dir/here'
+
+# Supported image types
+image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
+
+# Initialize the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval()
+
+# First pass with initial seed
+torch.manual_seed(1234)
+files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types]
+
+# Initialize tqdm with custom settings
+pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
+start_time = time.time()
+
+print("Captioning phase:")
+for i in range(len(files)):
+ filename = files[i]
+ image_path = os.path.join(image_directory, filename)
+
+ query = tokenizer.from_list_format([
+ {'image': image_path},
+ {'text': 'describe this image in detail, as if you are an art critic.'},
+ ])
+
+ response, _ = model.chat(tokenizer, query=query, history=None)
+
+ # If the caption has unwanted elements, clean it up
+ if has_unwanted_elements(response):
+ response = clean_caption(response)
+
+ # Save the cleaned caption to a text file in the main directory
+ txt_filename = os.path.splitext(filename)[0] + '.txt'
+ txt_path = os.path.join(image_directory, txt_filename)
+ with open(txt_path, 'w', encoding='utf-8') as f:
+ f.write(response)
+
+ elapsed_time = time.time() - start_time
+ images_per_sec = (i + 1) / elapsed_time
+ estimated_time_remaining = (len(files) - i - 1) / images_per_sec
+
+ pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
+ pbar.update(1)
+
+pbar.close()
\ No newline at end of file
From 2db5867b0674cb41a68ecf6b6f30d3bed0d6460a Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Wed, 25 Oct 2023 19:08:50 +0530
Subject: [PATCH 2/6] updated removal patterns
---
qwen-batch-single-pass.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index 3145097..1f5f50a 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -9,7 +9,7 @@
# Function to check for unwanted elements in the caption
def has_unwanted_elements(caption):
- patterns = [r'[.*?]', r'.*?', r'\[\d+\]', r'\(\[\d+\]\)']
+ patterns = [r'[.*?]', r'.*?']
return any(re.search(pattern, caption) for pattern in patterns)
# Function to clean up the caption
@@ -65,4 +65,4 @@ def clean_caption(caption):
pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
pbar.update(1)
-pbar.close()
\ No newline at end of file
+pbar.close()
From 62da942e7107fd1df3672418547c50457c83ecf8 Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Thu, 26 Oct 2023 10:04:17 +0530
Subject: [PATCH 3/6] Placeholder prompt
---
qwen-batch-single-pass.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index 1f5f50a..36d7b77 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -43,7 +43,7 @@ def clean_caption(caption):
query = tokenizer.from_list_format([
{'image': image_path},
- {'text': 'describe this image in detail, as if you are an art critic.'},
+ {'text': 'describe this image'},
])
response, _ = model.chat(tokenizer, query=query, history=None)
From 5467b464653048892a02e39d3e649d8e31273fee Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Thu, 26 Oct 2023 22:16:40 +0530
Subject: [PATCH 4/6] Added Arguments and Enabled Flash Attention by Default
Added Arguments
--imgdir=path/to/img/directory
--exist=skip/add/replace (To handle existing captions)
enabled use_flash_attn by default
---
qwen-batch-single-pass.py | 44 ++++++++++++++++++++++-----------------
1 file changed, 25 insertions(+), 19 deletions(-)
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index 36d7b77..ccee8b4 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -1,38 +1,35 @@
import os
import re
-import shutil
import torch
import time
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig
+import argparse
-# Function to check for unwanted elements in the caption
def has_unwanted_elements(caption):
patterns = [r'[.*?]', r'.*?']
return any(re.search(pattern, caption) for pattern in patterns)
-# Function to clean up the caption
def clean_caption(caption):
caption = re.sub(r'[(.*?)]', r'\1', caption)
caption = re.sub(r'.*?', '', caption)
return caption.strip()
-# Directory containing the images
-image_directory = '/path/to/img_dir/here'
+# Argument parsing
+parser = argparse.ArgumentParser(description='Image Captioning Script')
+parser.add_argument('--imgdir', type=str, default='img/dir/here', help='Path to image directory')
+parser.add_argument('--exist', type=str, choices=['skip', 'add', 'replace'], default='replace', help='Handling of existing txt files')
+args = parser.parse_args()
-# Supported image types
+image_directory = args.imgdir
image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
-# Initialize the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval()
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval()
-# First pass with initial seed
-torch.manual_seed(1234)
files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types]
-# Initialize tqdm with custom settings
pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
start_time = time.time()
@@ -40,23 +37,32 @@ def clean_caption(caption):
for i in range(len(files)):
filename = files[i]
image_path = os.path.join(image_directory, filename)
+
+ # Check for existing txt file and handle based on the argument
+ txt_filename = os.path.splitext(filename)[0] + '.txt'
+ txt_path = os.path.join(image_directory, txt_filename)
+ if args.exist == 'skip' and os.path.exists(txt_path):
+ pbar.update(1)
+ continue
+ elif args.exist == 'add' and os.path.exists(txt_path):
+ with open(txt_path, 'r', encoding='utf-8') as f:
+ existing_content = f.read()
+
query = tokenizer.from_list_format([
{'image': image_path},
- {'text': 'describe this image'},
+ {'text': 'describe this image in detail, as if you are an art critic in less than 35 words'},
])
-
response, _ = model.chat(tokenizer, query=query, history=None)
- # If the caption has unwanted elements, clean it up
if has_unwanted_elements(response):
response = clean_caption(response)
- # Save the cleaned caption to a text file in the main directory
- txt_filename = os.path.splitext(filename)[0] + '.txt'
- txt_path = os.path.join(image_directory, txt_filename)
with open(txt_path, 'w', encoding='utf-8') as f:
- f.write(response)
+ if args.exist == 'add' and os.path.exists(txt_path):
+ f.write(existing_content + "\n" + response)
+ else:
+ f.write(response)
elapsed_time = time.time() - start_time
images_per_sec = (i + 1) / elapsed_time
From 908cede6eeb469d9ba4c4153b0875b8dda926934 Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Thu, 26 Oct 2023 22:28:44 +0530
Subject: [PATCH 5/6] Final Update
Added argument for prompt
--prompt="your prompt here"
---
qwen-batch-single-pass.py | 42 +++++++++++++++++++++++----------------
1 file changed, 25 insertions(+), 17 deletions(-)
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index ccee8b4..fdd9fd3 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -2,45 +2,50 @@
import re
import torch
import time
+import argparse
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers.generation import GenerationConfig
-import argparse
+# Argument Parsing
+parser = argparse.ArgumentParser(description='Image Captioning Script')
+parser.add_argument('--imgdir', type=str, default='path/to/img/dir', help='Directory containing images')
+parser.add_argument('--exist', type=str, default='replace', choices=['skip', 'add', 'replace'], help='Handling of existing captions')
+parser.add_argument('--prompt', type=str, default='describe this image in detail, in less than 35 words', help='Prompt to use for image captioning')
+args = parser.parse_args()
+
+# Function to check for unwanted elements in the caption
def has_unwanted_elements(caption):
patterns = [r'[.*?]', r'.*?']
return any(re.search(pattern, caption) for pattern in patterns)
+# Function to clean up the caption
def clean_caption(caption):
caption = re.sub(r'[(.*?)]', r'\1', caption)
caption = re.sub(r'.*?', '', caption)
return caption.strip()
-# Argument parsing
-parser = argparse.ArgumentParser(description='Image Captioning Script')
-parser.add_argument('--imgdir', type=str, default='img/dir/here', help='Path to image directory')
-parser.add_argument('--exist', type=str, choices=['skip', 'add', 'replace'], default='replace', help='Handling of existing txt files')
-args = parser.parse_args()
-
-image_directory = args.imgdir
+# Supported image types
image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
+# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval()
-files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types]
+# Get the list of image files in the specified directory
+files = [f for f in os.listdir(args.imgdir) if os.path.splitext(f)[1].lower() in image_types]
+# Initialize the progress bar
pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
start_time = time.time()
print("Captioning phase:")
for i in range(len(files)):
filename = files[i]
- image_path = os.path.join(image_directory, filename)
+ image_path = os.path.join(args.imgdir, filename)
- # Check for existing txt file and handle based on the argument
+ # Handle based on the argument 'exist'
txt_filename = os.path.splitext(filename)[0] + '.txt'
- txt_path = os.path.join(image_directory, txt_filename)
+ txt_path = os.path.join(args.imgdir, txt_filename)
if args.exist == 'skip' and os.path.exists(txt_path):
pbar.update(1)
@@ -49,26 +54,29 @@ def clean_caption(caption):
with open(txt_path, 'r', encoding='utf-8') as f:
existing_content = f.read()
+ # Generate the caption using the model
query = tokenizer.from_list_format([
{'image': image_path},
- {'text': 'describe this image in detail, as if you are an art critic in less than 35 words'},
+ {'text': args.prompt},
])
response, _ = model.chat(tokenizer, query=query, history=None)
-
+
+ # Clean up the caption if necessary
if has_unwanted_elements(response):
response = clean_caption(response)
+ # Write the caption to the corresponding .txt file
with open(txt_path, 'w', encoding='utf-8') as f:
if args.exist == 'add' and os.path.exists(txt_path):
f.write(existing_content + "\n" + response)
else:
f.write(response)
+ # Update progress bar with some additional information about the process
elapsed_time = time.time() - start_time
images_per_sec = (i + 1) / elapsed_time
estimated_time_remaining = (len(files) - i - 1) / images_per_sec
-
pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
pbar.update(1)
-pbar.close()
+pbar.close()
\ No newline at end of file
From 7bc680f5d5b3ec107ee410654a830379608bb814 Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI
Date: Mon, 22 Jan 2024 14:24:56 +0530
Subject: [PATCH 6/6] --add arg fix
Changed it so that using --add does not create a new line, but appends the prompt in the same line.
---
qwen-batch-single-pass-v2.py | 90 ++++++++++++++++++++++++++++++++++++
qwen-batch-single-pass.py | 2 +-
2 files changed, 91 insertions(+), 1 deletion(-)
create mode 100644 qwen-batch-single-pass-v2.py
diff --git a/qwen-batch-single-pass-v2.py b/qwen-batch-single-pass-v2.py
new file mode 100644
index 0000000..2b26be9
--- /dev/null
+++ b/qwen-batch-single-pass-v2.py
@@ -0,0 +1,90 @@
+import os
+import re
+import torch
+import time
+import argparse
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+# Argument Parsing
+parser = argparse.ArgumentParser(description='Image Captioning Script')
+parser.add_argument('--imgdir', type=str, default='path/to/img/dir', help='Directory containing images')
+parser.add_argument('--exist', type=str, default='replace', choices=['skip', 'add', 'replace'], help='Handling of existing captions')
+parser.add_argument('--prompt', type=str, default='describe this image in detail, in less than 35 words', help='Prompt to use for image captioning')
+parser.add_argument('--sub', type=lambda x: (str(x).lower() == 'true'), default=False, help='Search for images in subdirectories')
+args = parser.parse_args()
+
+# Function to check for unwanted elements in the caption
+def has_unwanted_elements(caption):
+ patterns = [r'[.*?]', r'.*?']
+ return any(re.search(pattern, caption) for pattern in patterns)
+
+# Function to clean up the caption
+def clean_caption(caption):
+ caption = re.sub(r'[(.*?)]', r'\1', caption)
+ caption = re.sub(r'.*?', '', caption)
+ return caption.strip()
+
+# Supported image types
+image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
+
+# Initialize the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval()
+
+# Function to get files recursively from a directory
+def get_files_from_directory(directory, image_types, search_subdirectories=False):
+ if search_subdirectories:
+ return [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if os.path.splitext(f)[1].lower() in image_types]
+ else:
+ return [f for f in os.listdir(directory) if os.path.splitext(f)[1].lower() in image_types]
+
+# Get the list of image files in the specified directory, possibly including subdirectories
+files = get_files_from_directory(args.imgdir, image_types, args.sub)
+
+# Initialize the progress bar
+pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
+start_time = time.time()
+
+print("Captioning phase:")
+for i in range(len(files)):
+ filename = files[i]
+ image_path = os.path.join(args.imgdir, filename)
+
+ # Handle based on the argument 'exist'
+ txt_filename = os.path.splitext(filename)[0] + '.txt'
+ txt_path = os.path.join(args.imgdir, txt_filename)
+
+ if args.exist == 'skip' and os.path.exists(txt_path):
+ pbar.update(1)
+ continue
+ elif args.exist == 'add' and os.path.exists(txt_path):
+ with open(txt_path, 'r', encoding='utf-8') as f:
+ existing_content = f.read()
+
+ # Generate the caption using the model
+ query = tokenizer.from_list_format([
+ {'image': image_path},
+ {'text': args.prompt},
+ ])
+ response, _ = model.chat(tokenizer, query=query, history=None)
+
+ # Clean up the caption if necessary
+ if has_unwanted_elements(response):
+ response = clean_caption(response)
+
+ # Write the caption to the corresponding .txt file
+ with open(txt_path, 'w', encoding='utf-8') as f:
+ if args.exist == 'add' and os.path.exists(txt_path):
+ f.write(existing_content + " " + response)
+ else:
+ f.write(response)
+
+ # Update progress bar with some additional information about the process
+ elapsed_time = time.time() - start_time
+ images_per_sec = (i + 1) / elapsed_time
+ estimated_time_remaining = (len(files) - i - 1) / images_per_sec
+ pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
+ pbar.update(1)
+
+pbar.close()
\ No newline at end of file
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index fdd9fd3..69f5890 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -68,7 +68,7 @@ def clean_caption(caption):
# Write the caption to the corresponding .txt file
with open(txt_path, 'w', encoding='utf-8') as f:
if args.exist == 'add' and os.path.exists(txt_path):
- f.write(existing_content + "\n" + response)
+ f.write(existing_content + " " + response)
else:
f.write(response)