From e1946c60574edd35dde6d5ff32748d0beabf6aea Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Wed, 25 Oct 2023 17:26:30 +0530
Subject: [PATCH 1/6] Support For Directory Batch With a Single Prompt

Edit the python file to add location of the image directory that needs to be captioned (Line:22)
Edit Prompt that gets used (Line:46)
---
 qwen-batch-single-pass.py | 68 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 qwen-batch-single-pass.py
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
new file mode 100644
index 0000000..3145097
--- /dev/null
+++ b/qwen-batch-single-pass.py
@@ -0,0 +1,68 @@
+import os
+import re
+import shutil
+import torch
+import time
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.generation import GenerationConfig
+
+# Function to check for unwanted elements in the caption
+def has_unwanted_elements(caption):
+    patterns = [r'<ref>.*?</ref>', r'<box>.*?</box>', r'\[\d+\]', r'\(\[\d+\]\)']
+    return any(re.search(pattern, caption) for pattern in patterns)
+
+# Function to clean up the caption
+def clean_caption(caption):
+    caption = re.sub(r'<ref>(.*?)</ref>', r'\1', caption)
+    caption = re.sub(r'<box>.*?</box>', '', caption)
+    return caption.strip()
+
+# Directory containing the images
+image_directory = '/path/to/img_dir/here'
+
+# Supported image types
+image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
+
+# Initialize the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval()
+
+# First pass with initial seed
+torch.manual_seed(1234)
+files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types]
+
+# Initialize tqdm with custom settings
+pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
+start_time = time.time()
+
+print("Captioning phase:")
+for i in range(len(files)):
+    filename = files[i]
+    image_path = os.path.join(image_directory, filename)
+    
+    query = tokenizer.from_list_format([
+        {'image': image_path},
+        {'text': 'describe this image in detail, as if you are an art critic.'},
+    ])
+    
+    response, _ = model.chat(tokenizer, query=query, history=None)
+    
+    # If the caption has unwanted elements, clean it up
+    if has_unwanted_elements(response):
+        response = clean_caption(response)
+    
+    # Save the cleaned caption to a text file in the main directory
+    txt_filename = os.path.splitext(filename)[0] + '.txt'
+    txt_path = os.path.join(image_directory, txt_filename)
+    with open(txt_path, 'w', encoding='utf-8') as f:
+        f.write(response)
+
+    elapsed_time = time.time() - start_time
+    images_per_sec = (i + 1) / elapsed_time
+    estimated_time_remaining = (len(files) - i - 1) / images_per_sec
+
+    pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
+    pbar.update(1)
+
+pbar.close()
\ No newline at end of file

From 2db5867b0674cb41a68ecf6b6f30d3bed0d6460a Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Wed, 25 Oct 2023 19:08:50 +0530
Subject: [PATCH 2/6] updated removal patterns

---
 qwen-batch-single-pass.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index 3145097..1f5f50a 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -9,7 +9,7 @@
 
 # Function to check for unwanted elements in the caption
 def has_unwanted_elements(caption):
-    patterns = [r'<ref>.*?</ref>', r'<box>.*?</box>', r'\[\d+\]', r'\(\[\d+\]\)']
+    patterns = [r'<ref>.*?</ref>', r'<box>.*?</box>']
     return any(re.search(pattern, caption) for pattern in patterns)
 
 # Function to clean up the caption
@@ -65,4 +65,4 @@ def clean_caption(caption):
     pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
     pbar.update(1)
 
-pbar.close()
\ No newline at end of file
+pbar.close()

From 62da942e7107fd1df3672418547c50457c83ecf8 Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Thu, 26 Oct 2023 10:04:17 +0530
Subject: [PATCH 3/6] Placeholder prompt

---
 qwen-batch-single-pass.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index 1f5f50a..36d7b77 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -43,7 +43,7 @@ def clean_caption(caption):
     
     query = tokenizer.from_list_format([
         {'image': image_path},
-        {'text': 'describe this image in detail, as if you are an art critic.'},
+        {'text': 'describe this image'},
     ])
     
     response, _ = model.chat(tokenizer, query=query, history=None)

From 5467b464653048892a02e39d3e649d8e31273fee Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Thu, 26 Oct 2023 22:16:40 +0530
Subject: [PATCH 4/6] Added Arguments and Enabled Flash Attention by Default

Added Arguments
--imgdir=path/to/img/directory
--exist=skip/add/replace (To handle existing captions)

enabled use_flash_attn by default
---
 qwen-batch-single-pass.py | 44 ++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index 36d7b77..ccee8b4 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -1,38 +1,35 @@
 import os
 import re
-import shutil
 import torch
 import time
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
+import argparse
 
-# Function to check for unwanted elements in the caption
 def has_unwanted_elements(caption):
     patterns = [r'<ref>.*?</ref>', r'<box>.*?</box>']
     return any(re.search(pattern, caption) for pattern in patterns)
 
-# Function to clean up the caption
 def clean_caption(caption):
     caption = re.sub(r'<ref>(.*?)</ref>', r'\1', caption)
     caption = re.sub(r'<box>.*?</box>', '', caption)
     return caption.strip()
 
-# Directory containing the images
-image_directory = '/path/to/img_dir/here'
+# Argument parsing
+parser = argparse.ArgumentParser(description='Image Captioning Script')
+parser.add_argument('--imgdir', type=str, default='img/dir/here', help='Path to image directory')
+parser.add_argument('--exist', type=str, choices=['skip', 'add', 'replace'], default='replace', help='Handling of existing txt files')
+args = parser.parse_args()
 
-# Supported image types
+image_directory = args.imgdir
 image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
 
-# Initialize the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval()
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval()
 
-# First pass with initial seed
-torch.manual_seed(1234)
 files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types]
 
-# Initialize tqdm with custom settings
 pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
 start_time = time.time()
 
@@ -40,23 +37,32 @@ def clean_caption(caption):
 for i in range(len(files)):
     filename = files[i]
     image_path = os.path.join(image_directory, filename)
+
+    # Check for existing txt file and handle based on the argument
+    txt_filename = os.path.splitext(filename)[0] + '.txt'
+    txt_path = os.path.join(image_directory, txt_filename)
     
+    if args.exist == 'skip' and os.path.exists(txt_path):
+        pbar.update(1)
+        continue
+    elif args.exist == 'add' and os.path.exists(txt_path):
+        with open(txt_path, 'r', encoding='utf-8') as f:
+            existing_content = f.read()
+
     query = tokenizer.from_list_format([
         {'image': image_path},
-        {'text': 'describe this image'},
+        {'text': 'describe this image in detail, as if you are an art critic in less than 35 words'},
     ])
-    
     response, _ = model.chat(tokenizer, query=query, history=None)
     
-    # If the caption has unwanted elements, clean it up
     if has_unwanted_elements(response):
         response = clean_caption(response)
     
-    # Save the cleaned caption to a text file in the main directory
-    txt_filename = os.path.splitext(filename)[0] + '.txt'
-    txt_path = os.path.join(image_directory, txt_filename)
     with open(txt_path, 'w', encoding='utf-8') as f:
-        f.write(response)
+        if args.exist == 'add' and os.path.exists(txt_path):
+            f.write(existing_content + "\n" + response)
+        else:
+            f.write(response)
 
     elapsed_time = time.time() - start_time
     images_per_sec = (i + 1) / elapsed_time

From 908cede6eeb469d9ba4c4153b0875b8dda926934 Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com>
Date: Thu, 26 Oct 2023 22:28:44 +0530
Subject: [PATCH 5/6] Final Update

Added argument for prompt
--prompt="your prompt here"
---
 qwen-batch-single-pass.py | 42 +++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index ccee8b4..fdd9fd3 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -2,45 +2,50 @@
 import re
 import torch
 import time
+import argparse
 from tqdm import tqdm
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers.generation import GenerationConfig
-import argparse
 
+# Argument Parsing
+parser = argparse.ArgumentParser(description='Image Captioning Script')
+parser.add_argument('--imgdir', type=str, default='path/to/img/dir', help='Directory containing images')
+parser.add_argument('--exist', type=str, default='replace', choices=['skip', 'add', 'replace'], help='Handling of existing captions')
+parser.add_argument('--prompt', type=str, default='describe this image in detail, in less than 35 words', help='Prompt to use for image captioning')
+args = parser.parse_args()
+
+# Function to check for unwanted elements in the caption
 def has_unwanted_elements(caption):
     patterns = [r'<ref>.*?</ref>', r'<box>.*?</box>']
     return any(re.search(pattern, caption) for pattern in patterns)
 
+# Function to clean up the caption
 def clean_caption(caption):
     caption = re.sub(r'<ref>(.*?)</ref>', r'\1', caption)
     caption = re.sub(r'<box>.*?</box>', '', caption)
     return caption.strip()
 
-# Argument parsing
-parser = argparse.ArgumentParser(description='Image Captioning Script')
-parser.add_argument('--imgdir', type=str, default='img/dir/here', help='Path to image directory')
-parser.add_argument('--exist', type=str, choices=['skip', 'add', 'replace'], default='replace', help='Handling of existing txt files')
-args = parser.parse_args()
-
-image_directory = args.imgdir
+# Supported image types
 image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
 
+# Initialize the model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval()
 
-files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types]
+# Get the list of image files in the specified directory
+files = [f for f in os.listdir(args.imgdir) if os.path.splitext(f)[1].lower() in image_types]
 
+# Initialize the progress bar
 pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
 start_time = time.time()
 
 print("Captioning phase:")
 for i in range(len(files)):
     filename = files[i]
-    image_path = os.path.join(image_directory, filename)
+    image_path = os.path.join(args.imgdir, filename)
 
-    # Check for existing txt file and handle based on the argument
+    # Handle based on the argument 'exist'
     txt_filename = os.path.splitext(filename)[0] + '.txt'
-    txt_path = os.path.join(image_directory, txt_filename)
+    txt_path = os.path.join(args.imgdir, txt_filename)
     
     if args.exist == 'skip' and os.path.exists(txt_path):
         pbar.update(1)
@@ -49,26 +54,29 @@ def clean_caption(caption):
         with open(txt_path, 'r', encoding='utf-8') as f:
             existing_content = f.read()
 
+    # Generate the caption using the model
     query = tokenizer.from_list_format([
         {'image': image_path},
-        {'text': 'describe this image in detail, as if you are an art critic in less than 35 words'},
+        {'text': args.prompt},
     ])
     response, _ = model.chat(tokenizer, query=query, history=None)
-    
+
+    # Clean up the caption if necessary
     if has_unwanted_elements(response):
         response = clean_caption(response)
     
+    # Write the caption to the corresponding .txt file
     with open(txt_path, 'w', encoding='utf-8') as f:
         if args.exist == 'add' and os.path.exists(txt_path):
             f.write(existing_content + "\n" + response)
         else:
             f.write(response)
 
+    # Update progress bar with some additional information about the process
     elapsed_time = time.time() - start_time
     images_per_sec = (i + 1) / elapsed_time
     estimated_time_remaining = (len(files) - i - 1) / images_per_sec
-
     pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
     pbar.update(1)
 
-pbar.close()
+pbar.close()
\ No newline at end of file

From 7bc680f5d5b3ec107ee410654a830379608bb814 Mon Sep 17 00:00:00 2001
From: ShadoWxShinigamI <shadowshingami123@gmail.com>
Date: Mon, 22 Jan 2024 14:24:56 +0530
Subject: [PATCH 6/6] --add arg fix

Changed it so that using --add does not create a new line, but appends the prompt in the same line.
---
 qwen-batch-single-pass-v2.py | 90 ++++++++++++++++++++++++++++++++++++
 qwen-batch-single-pass.py    |  2 +-
 2 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 qwen-batch-single-pass-v2.py

diff --git a/qwen-batch-single-pass-v2.py b/qwen-batch-single-pass-v2.py
new file mode 100644
index 0000000..2b26be9
--- /dev/null
+++ b/qwen-batch-single-pass-v2.py
@@ -0,0 +1,90 @@
+import os
+import re
+import torch
+import time
+import argparse
+from tqdm import tqdm
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+# Argument Parsing
+parser = argparse.ArgumentParser(description='Image Captioning Script')
+parser.add_argument('--imgdir', type=str, default='path/to/img/dir', help='Directory containing images')
+parser.add_argument('--exist', type=str, default='replace', choices=['skip', 'add', 'replace'], help='Handling of existing captions')
+parser.add_argument('--prompt', type=str, default='describe this image in detail, in less than 35 words', help='Prompt to use for image captioning')
+parser.add_argument('--sub', type=lambda x: (str(x).lower() == 'true'), default=False, help='Search for images in subdirectories')
+args = parser.parse_args()
+
+# Function to check for unwanted elements in the caption
+def has_unwanted_elements(caption):
+    patterns = [r'<ref>.*?</ref>', r'<box>.*?</box>']
+    return any(re.search(pattern, caption) for pattern in patterns)
+
+# Function to clean up the caption
+def clean_caption(caption):
+    caption = re.sub(r'<ref>(.*?)</ref>', r'\1', caption)
+    caption = re.sub(r'<box>.*?</box>', '', caption)
+    return caption.strip()
+
+# Supported image types
+image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif']
+
+# Initialize the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval()
+
+# Function to get files recursively from a directory
+def get_files_from_directory(directory, image_types, search_subdirectories=False):
+    if search_subdirectories:
+        return [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if os.path.splitext(f)[1].lower() in image_types]
+    else:
+        return [f for f in os.listdir(directory) if os.path.splitext(f)[1].lower() in image_types]
+
+# Get the list of image files in the specified directory, possibly including subdirectories
+files = get_files_from_directory(args.imgdir, image_types, args.sub)
+
+# Initialize the progress bar
+pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True)
+start_time = time.time()
+
+print("Captioning phase:")
+for i in range(len(files)):
+    filename = files[i]
+    image_path = os.path.join(args.imgdir, filename)
+
+    # Handle based on the argument 'exist'
+    txt_filename = os.path.splitext(filename)[0] + '.txt'
+    txt_path = os.path.join(args.imgdir, txt_filename)
+    
+    if args.exist == 'skip' and os.path.exists(txt_path):
+        pbar.update(1)
+        continue
+    elif args.exist == 'add' and os.path.exists(txt_path):
+        with open(txt_path, 'r', encoding='utf-8') as f:
+            existing_content = f.read()
+
+    # Generate the caption using the model
+    query = tokenizer.from_list_format([
+        {'image': image_path},
+        {'text': args.prompt},
+    ])
+    response, _ = model.chat(tokenizer, query=query, history=None)
+
+    # Clean up the caption if necessary
+    if has_unwanted_elements(response):
+        response = clean_caption(response)
+    
+    # Write the caption to the corresponding .txt file
+    with open(txt_path, 'w', encoding='utf-8') as f:
+        if args.exist == 'add' and os.path.exists(txt_path):
+            f.write(existing_content + " " + response)
+        else:
+            f.write(response)
+
+    # Update progress bar with some additional information about the process
+    elapsed_time = time.time() - start_time
+    images_per_sec = (i + 1) / elapsed_time
+    estimated_time_remaining = (len(files) - i - 1) / images_per_sec
+    pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"})
+    pbar.update(1)
+
+pbar.close()
\ No newline at end of file
diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py
index fdd9fd3..69f5890 100644
--- a/qwen-batch-single-pass.py
+++ b/qwen-batch-single-pass.py
@@ -68,7 +68,7 @@ def clean_caption(caption):
     # Write the caption to the corresponding .txt file
     with open(txt_path, 'w', encoding='utf-8') as f:
         if args.exist == 'add' and os.path.exists(txt_path):
-            f.write(existing_content + "\n" + response)
+            f.write(existing_content + " " + response)
         else:
             f.write(response)