From e1946c60574edd35dde6d5ff32748d0beabf6aea Mon Sep 17 00:00:00 2001 From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com> Date: Wed, 25 Oct 2023 17:26:30 +0530 Subject: [PATCH 1/6] Support For Directory Batch With a Single Prompt Edit the python file to add location of the image directory that needs to be captioned (Line:22) Edit Prompt that gets used (Line:46) --- qwen-batch-single-pass.py | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 qwen-batch-single-pass.py diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py new file mode 100644 index 0000000..3145097 --- /dev/null +++ b/qwen-batch-single-pass.py @@ -0,0 +1,68 @@ +import os +import re +import shutil +import torch +import time +from tqdm import tqdm +from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers.generation import GenerationConfig + +# Function to check for unwanted elements in the caption +def has_unwanted_elements(caption): + patterns = [r'.*?', r'.*?', r'\[\d+\]', r'\(\[\d+\]\)'] + return any(re.search(pattern, caption) for pattern in patterns) + +# Function to clean up the caption +def clean_caption(caption): + caption = re.sub(r'(.*?)', r'\1', caption) + caption = re.sub(r'.*?', '', caption) + return caption.strip() + +# Directory containing the images +image_directory = '/path/to/img_dir/here' + +# Supported image types +image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif'] + +# Initialize the model and tokenizer +tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval() + +# First pass with initial seed +torch.manual_seed(1234) +files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types] + +# Initialize tqdm with custom settings +pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True) +start_time = time.time() + +print("Captioning phase:") +for i in range(len(files)): + filename = files[i] + image_path = os.path.join(image_directory, filename) + + query = tokenizer.from_list_format([ + {'image': image_path}, + {'text': 'describe this image in detail, as if you are an art critic.'}, + ]) + + response, _ = model.chat(tokenizer, query=query, history=None) + + # If the caption has unwanted elements, clean it up + if has_unwanted_elements(response): + response = clean_caption(response) + + # Save the cleaned caption to a text file in the main directory + txt_filename = os.path.splitext(filename)[0] + '.txt' + txt_path = os.path.join(image_directory, txt_filename) + with open(txt_path, 'w', encoding='utf-8') as f: + f.write(response) + + elapsed_time = time.time() - start_time + images_per_sec = (i + 1) / elapsed_time + estimated_time_remaining = (len(files) - i - 1) / images_per_sec + + pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"}) + pbar.update(1) + +pbar.close() \ No newline at end of file From 2db5867b0674cb41a68ecf6b6f30d3bed0d6460a Mon Sep 17 00:00:00 2001 From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com> Date: Wed, 25 Oct 2023 19:08:50 +0530 Subject: [PATCH 2/6] updated removal patterns --- qwen-batch-single-pass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py index 3145097..1f5f50a 100644 --- a/qwen-batch-single-pass.py +++ b/qwen-batch-single-pass.py @@ -9,7 +9,7 @@ # Function to check for unwanted elements in the caption def has_unwanted_elements(caption): - patterns = [r'.*?', r'.*?', r'\[\d+\]', r'\(\[\d+\]\)'] + patterns = [r'.*?', r'.*?'] return any(re.search(pattern, caption) for pattern in patterns) # Function to clean up the caption @@ -65,4 +65,4 @@ def clean_caption(caption): pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"}) pbar.update(1) -pbar.close() \ No newline at end of file +pbar.close() From 62da942e7107fd1df3672418547c50457c83ecf8 Mon Sep 17 00:00:00 2001 From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com> Date: Thu, 26 Oct 2023 10:04:17 +0530 Subject: [PATCH 3/6] Placeholder prompt --- qwen-batch-single-pass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py index 1f5f50a..36d7b77 100644 --- a/qwen-batch-single-pass.py +++ b/qwen-batch-single-pass.py @@ -43,7 +43,7 @@ def clean_caption(caption): query = tokenizer.from_list_format([ {'image': image_path}, - {'text': 'describe this image in detail, as if you are an art critic.'}, + {'text': 'describe this image'}, ]) response, _ = model.chat(tokenizer, query=query, history=None) From 5467b464653048892a02e39d3e649d8e31273fee Mon Sep 17 00:00:00 2001 From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:16:40 +0530 Subject: [PATCH 4/6] Added Arguments and Enabled Flash Attention by Default Added Arguments --imgdir=path/to/img/directory --exist=skip/add/replace (To handle existing captions) enabled use_flash_attn by default --- qwen-batch-single-pass.py | 44 ++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py index 36d7b77..ccee8b4 100644 --- a/qwen-batch-single-pass.py +++ b/qwen-batch-single-pass.py @@ -1,38 +1,35 @@ import os import re -import shutil import torch import time from tqdm import tqdm from transformers import AutoModelForCausalLM, AutoTokenizer from transformers.generation import GenerationConfig +import argparse -# Function to check for unwanted elements in the caption def has_unwanted_elements(caption): patterns = [r'.*?', r'.*?'] return any(re.search(pattern, caption) for pattern in patterns) -# Function to clean up the caption def clean_caption(caption): caption = re.sub(r'(.*?)', r'\1', caption) caption = re.sub(r'.*?', '', caption) return caption.strip() -# Directory containing the images -image_directory = '/path/to/img_dir/here' +# Argument parsing +parser = argparse.ArgumentParser(description='Image Captioning Script') +parser.add_argument('--imgdir', type=str, default='img/dir/here', help='Path to image directory') +parser.add_argument('--exist', type=str, choices=['skip', 'add', 'replace'], default='replace', help='Handling of existing txt files') +args = parser.parse_args() -# Supported image types +image_directory = args.imgdir image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif'] -# Initialize the model and tokenizer -tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True) -model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL", device_map="cuda", trust_remote_code=True).eval() +tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval() -# First pass with initial seed -torch.manual_seed(1234) files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types] -# Initialize tqdm with custom settings pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True) start_time = time.time() @@ -40,23 +37,32 @@ def clean_caption(caption): for i in range(len(files)): filename = files[i] image_path = os.path.join(image_directory, filename) + + # Check for existing txt file and handle based on the argument + txt_filename = os.path.splitext(filename)[0] + '.txt' + txt_path = os.path.join(image_directory, txt_filename) + if args.exist == 'skip' and os.path.exists(txt_path): + pbar.update(1) + continue + elif args.exist == 'add' and os.path.exists(txt_path): + with open(txt_path, 'r', encoding='utf-8') as f: + existing_content = f.read() + query = tokenizer.from_list_format([ {'image': image_path}, - {'text': 'describe this image'}, + {'text': 'describe this image in detail, as if you are an art critic in less than 35 words'}, ]) - response, _ = model.chat(tokenizer, query=query, history=None) - # If the caption has unwanted elements, clean it up if has_unwanted_elements(response): response = clean_caption(response) - # Save the cleaned caption to a text file in the main directory - txt_filename = os.path.splitext(filename)[0] + '.txt' - txt_path = os.path.join(image_directory, txt_filename) with open(txt_path, 'w', encoding='utf-8') as f: - f.write(response) + if args.exist == 'add' and os.path.exists(txt_path): + f.write(existing_content + "\n" + response) + else: + f.write(response) elapsed_time = time.time() - start_time images_per_sec = (i + 1) / elapsed_time From 908cede6eeb469d9ba4c4153b0875b8dda926934 Mon Sep 17 00:00:00 2001 From: ShadoWxShinigamI <116374738+ShadoWxShinigamI@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:28:44 +0530 Subject: [PATCH 5/6] Final Update Added argument for prompt --prompt="your prompt here" --- qwen-batch-single-pass.py | 42 +++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py index ccee8b4..fdd9fd3 100644 --- a/qwen-batch-single-pass.py +++ b/qwen-batch-single-pass.py @@ -2,45 +2,50 @@ import re import torch import time +import argparse from tqdm import tqdm from transformers import AutoModelForCausalLM, AutoTokenizer -from transformers.generation import GenerationConfig -import argparse +# Argument Parsing +parser = argparse.ArgumentParser(description='Image Captioning Script') +parser.add_argument('--imgdir', type=str, default='path/to/img/dir', help='Directory containing images') +parser.add_argument('--exist', type=str, default='replace', choices=['skip', 'add', 'replace'], help='Handling of existing captions') +parser.add_argument('--prompt', type=str, default='describe this image in detail, in less than 35 words', help='Prompt to use for image captioning') +args = parser.parse_args() + +# Function to check for unwanted elements in the caption def has_unwanted_elements(caption): patterns = [r'.*?', r'.*?'] return any(re.search(pattern, caption) for pattern in patterns) +# Function to clean up the caption def clean_caption(caption): caption = re.sub(r'(.*?)', r'\1', caption) caption = re.sub(r'.*?', '', caption) return caption.strip() -# Argument parsing -parser = argparse.ArgumentParser(description='Image Captioning Script') -parser.add_argument('--imgdir', type=str, default='img/dir/here', help='Path to image directory') -parser.add_argument('--exist', type=str, choices=['skip', 'add', 'replace'], default='replace', help='Handling of existing txt files') -args = parser.parse_args() - -image_directory = args.imgdir +# Supported image types image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif'] +# Initialize the model and tokenizer tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval() -files = [f for f in os.listdir(image_directory) if os.path.splitext(f)[1].lower() in image_types] +# Get the list of image files in the specified directory +files = [f for f in os.listdir(args.imgdir) if os.path.splitext(f)[1].lower() in image_types] +# Initialize the progress bar pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True) start_time = time.time() print("Captioning phase:") for i in range(len(files)): filename = files[i] - image_path = os.path.join(image_directory, filename) + image_path = os.path.join(args.imgdir, filename) - # Check for existing txt file and handle based on the argument + # Handle based on the argument 'exist' txt_filename = os.path.splitext(filename)[0] + '.txt' - txt_path = os.path.join(image_directory, txt_filename) + txt_path = os.path.join(args.imgdir, txt_filename) if args.exist == 'skip' and os.path.exists(txt_path): pbar.update(1) @@ -49,26 +54,29 @@ def clean_caption(caption): with open(txt_path, 'r', encoding='utf-8') as f: existing_content = f.read() + # Generate the caption using the model query = tokenizer.from_list_format([ {'image': image_path}, - {'text': 'describe this image in detail, as if you are an art critic in less than 35 words'}, + {'text': args.prompt}, ]) response, _ = model.chat(tokenizer, query=query, history=None) - + + # Clean up the caption if necessary if has_unwanted_elements(response): response = clean_caption(response) + # Write the caption to the corresponding .txt file with open(txt_path, 'w', encoding='utf-8') as f: if args.exist == 'add' and os.path.exists(txt_path): f.write(existing_content + "\n" + response) else: f.write(response) + # Update progress bar with some additional information about the process elapsed_time = time.time() - start_time images_per_sec = (i + 1) / elapsed_time estimated_time_remaining = (len(files) - i - 1) / images_per_sec - pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"}) pbar.update(1) -pbar.close() +pbar.close() \ No newline at end of file From 7bc680f5d5b3ec107ee410654a830379608bb814 Mon Sep 17 00:00:00 2001 From: ShadoWxShinigamI Date: Mon, 22 Jan 2024 14:24:56 +0530 Subject: [PATCH 6/6] --add arg fix Changed it so that using --add does not create a new line, but appends the prompt in the same line. --- qwen-batch-single-pass-v2.py | 90 ++++++++++++++++++++++++++++++++++++ qwen-batch-single-pass.py | 2 +- 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 qwen-batch-single-pass-v2.py diff --git a/qwen-batch-single-pass-v2.py b/qwen-batch-single-pass-v2.py new file mode 100644 index 0000000..2b26be9 --- /dev/null +++ b/qwen-batch-single-pass-v2.py @@ -0,0 +1,90 @@ +import os +import re +import torch +import time +import argparse +from tqdm import tqdm +from transformers import AutoModelForCausalLM, AutoTokenizer + +# Argument Parsing +parser = argparse.ArgumentParser(description='Image Captioning Script') +parser.add_argument('--imgdir', type=str, default='path/to/img/dir', help='Directory containing images') +parser.add_argument('--exist', type=str, default='replace', choices=['skip', 'add', 'replace'], help='Handling of existing captions') +parser.add_argument('--prompt', type=str, default='describe this image in detail, in less than 35 words', help='Prompt to use for image captioning') +parser.add_argument('--sub', type=lambda x: (str(x).lower() == 'true'), default=False, help='Search for images in subdirectories') +args = parser.parse_args() + +# Function to check for unwanted elements in the caption +def has_unwanted_elements(caption): + patterns = [r'.*?', r'.*?'] + return any(re.search(pattern, caption) for pattern in patterns) + +# Function to clean up the caption +def clean_caption(caption): + caption = re.sub(r'(.*?)', r'\1', caption) + caption = re.sub(r'.*?', '', caption) + return caption.strip() + +# Supported image types +image_types = ['.png', '.jpg', '.jpeg', '.bmp', '.gif'] + +# Initialize the model and tokenizer +tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True) +model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True, use_flash_attn=True).eval() + +# Function to get files recursively from a directory +def get_files_from_directory(directory, image_types, search_subdirectories=False): + if search_subdirectories: + return [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if os.path.splitext(f)[1].lower() in image_types] + else: + return [f for f in os.listdir(directory) if os.path.splitext(f)[1].lower() in image_types] + +# Get the list of image files in the specified directory, possibly including subdirectories +files = get_files_from_directory(args.imgdir, image_types, args.sub) + +# Initialize the progress bar +pbar = tqdm(total=len(files), desc="Captioning", dynamic_ncols=True, position=0, leave=True) +start_time = time.time() + +print("Captioning phase:") +for i in range(len(files)): + filename = files[i] + image_path = os.path.join(args.imgdir, filename) + + # Handle based on the argument 'exist' + txt_filename = os.path.splitext(filename)[0] + '.txt' + txt_path = os.path.join(args.imgdir, txt_filename) + + if args.exist == 'skip' and os.path.exists(txt_path): + pbar.update(1) + continue + elif args.exist == 'add' and os.path.exists(txt_path): + with open(txt_path, 'r', encoding='utf-8') as f: + existing_content = f.read() + + # Generate the caption using the model + query = tokenizer.from_list_format([ + {'image': image_path}, + {'text': args.prompt}, + ]) + response, _ = model.chat(tokenizer, query=query, history=None) + + # Clean up the caption if necessary + if has_unwanted_elements(response): + response = clean_caption(response) + + # Write the caption to the corresponding .txt file + with open(txt_path, 'w', encoding='utf-8') as f: + if args.exist == 'add' and os.path.exists(txt_path): + f.write(existing_content + " " + response) + else: + f.write(response) + + # Update progress bar with some additional information about the process + elapsed_time = time.time() - start_time + images_per_sec = (i + 1) / elapsed_time + estimated_time_remaining = (len(files) - i - 1) / images_per_sec + pbar.set_postfix({"Time Elapsed": f"{elapsed_time:.2f}s", "ETA": f"{estimated_time_remaining:.2f}s", "Speed": f"{images_per_sec:.2f} img/s"}) + pbar.update(1) + +pbar.close() \ No newline at end of file diff --git a/qwen-batch-single-pass.py b/qwen-batch-single-pass.py index fdd9fd3..69f5890 100644 --- a/qwen-batch-single-pass.py +++ b/qwen-batch-single-pass.py @@ -68,7 +68,7 @@ def clean_caption(caption): # Write the caption to the corresponding .txt file with open(txt_path, 'w', encoding='utf-8') as f: if args.exist == 'add' and os.path.exists(txt_path): - f.write(existing_content + "\n" + response) + f.write(existing_content + " " + response) else: f.write(response)