diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/PodGPT.iml b/.idea/PodGPT.iml
new file mode 100644
index 0000000..bd7cd0d
--- /dev/null
+++ b/.idea/PodGPT.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..0b1a123
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,63 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..2426e6c
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..fb29d16
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/lib/model_loader_large.py b/lib/model_loader_large.py
index 55c6546..4d65e3d 100644
--- a/lib/model_loader_large.py
+++ b/lib/model_loader_large.py
@@ -98,7 +98,7 @@ def model_loader(config):
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
bias="none",
- # Please note that the current vLLM is not supporting
+ # Please note that the current vLLM is not supporting
# the modules "w1", "w2", "w3", and "gate" at this point (June 20, 2024)
target_modules=[
"q_proj", "k_proj", "v_proj", "o_proj"
diff --git a/lib/model_loader_quantization.py b/lib/model_loader_quantization.py
index ac90e96..26109f9 100644
--- a/lib/model_loader_quantization.py
+++ b/lib/model_loader_quantization.py
@@ -8,7 +8,7 @@
import os
from transformers import AutoTokenizer, TrainingArguments
-from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig, get_gptq_peft_model
+from auto_gptq import AutoGPTQForCausalLM, get_gptq_peft_model
from auto_gptq.utils.peft_utils import GPTQLoraConfig
from peft import TaskType
from trl import SFTTrainer
@@ -34,7 +34,7 @@ def model_initializer(config):
model = AutoGPTQForCausalLM.from_quantized(
model_name,
# Since we are using the auto-gptq==0.6.0,
- # We cannot use shard safetensors and here we just use the single 39.8GB single-safetensor checkpoint.
+ # We cannot use shard safetensors and here we just use the single 39.8GB single-safetensor checkpoint.
# https://huggingface.co/shuyuej/Llama-3.3-70B-Instruct-GPTQ/tree/f77c1b3864179c38146f12656804b5b3dfd1e2a2
revision="f77c1b3",
use_safetensors=True,
@@ -51,7 +51,8 @@ def model_initializer(config):
model.warmup_triton()
# https://gist.github.com/eusip/de8fadb761741b56d5d9a6232bf979ed#file-oasst-pythia-12b-05-03-2023-py-L68-L87
- # NOTE: https://github.com/lvwerra/trl/blob/a2749d9e0c96198486b788875eda3b325f76a5c8/examples/sentiment/scripts/gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py#L181
+ # https://github.com/lvwerra/trl/blob/a2749d9e0c96198486b788875eda3b325f76a5c8/examples/sentiment/scripts/
+ # gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py#L181
for param in model.parameters():
# freeze base model's layers
param.requires_grad = False
diff --git a/main_large.py b/main_large.py
index fb5cd9d..7151be3 100644
--- a/main_large.py
+++ b/main_large.py
@@ -90,4 +90,3 @@ def main(config):
print(yaml.dump(config, default_flow_style=False), '\n\n')
main(config=config)
sys.stdout = sys.__stdout__
-
\ No newline at end of file
diff --git a/main_quantization.py b/main_quantization.py
index 368be8c..fcb1437 100644
--- a/main_quantization.py
+++ b/main_quantization.py
@@ -90,4 +90,3 @@ def main(config):
print(yaml.dump(config, default_flow_style=False), '\n\n')
main(config=config)
sys.stdout = sys.__stdout__
-
\ No newline at end of file
diff --git a/main_small.py b/main_small.py
index cd6ea30..c432572 100644
--- a/main_small.py
+++ b/main_small.py
@@ -96,4 +96,3 @@ def main(config):
print(yaml.dump(config, default_flow_style=False), '\n\n')
main(config=config)
sys.stdout = sys.__stdout__
-
\ No newline at end of file
diff --git a/quantization/quantization.py b/quantization/quantization.py
index 73bc441..4f00b08 100644
--- a/quantization/quantization.py
+++ b/quantization/quantization.py
@@ -25,7 +25,6 @@
####################################################################################
-import time
import os
import logging
import argparse
@@ -109,7 +108,6 @@ def quantization(model_dir, output_dir, quantdataset, bits, group_size, desc_act
raise ValueError(f"Unsupported dtype: {dtype}")
# Load the model with specified quantization settings
- logger.info(f"Loading model from {model_dir} with trust_remote_code={trust_remote_code} and dtype={torch_dtype}")
model = AutoGPTQForCausalLM.from_pretrained(
model_dir,
quantize_config=quantize_config,
@@ -119,15 +117,10 @@ def quantization(model_dir, output_dir, quantdataset, bits, group_size, desc_act
)
# Perform the quantization process
- logger.info(f"Starting quantization to {output_dir} with use_triton={use_triton}")
- start_time = time.time()
model.quantize(quantdataset, use_triton=use_triton, batch_size=batch_size)
- logger.info(f"Time to quantize model at {output_dir} with use_triton={use_triton}: {time.time() - start_time:.2f}")
# Save the quantized model
- logger.info(f"Saving quantized model to {output_dir}")
model.save_quantized(output_dir, use_safetensors=True)
- logger.info("Done.")
def mian(args):
@@ -198,12 +191,12 @@ def mian(args):
logger.error(f"Aborted. Will delete {output_dir}")
os.rmdir(output_dir)
abort = True
- except:
+ except Exception:
raise
finally:
count += 1
else:
- logger.error(f"Aborting - told to stop!")
+ logger.error("Aborting - told to stop!")
break
diff --git a/quantization/quantization_GPTQModel.py b/quantization/quantization_GPTQModel.py
index ea9141a..6e0c737 100644
--- a/quantization/quantization_GPTQModel.py
+++ b/quantization/quantization_GPTQModel.py
@@ -138,15 +138,15 @@ def mian(args):
)
except KeyboardInterrupt:
# Handle user interrupt
- logger.error(f"Aborted. Will delete {output_dir}")
+ logger.error("Aborted. Will delete {output_dir}")
os.rmdir(output_dir)
abort = True
- except:
+ except Exception:
raise
finally:
count += 1
else:
- logger.error(f"Aborting - told to stop!")
+ logger.error("Aborting - told to stop!")
break
diff --git a/quantization/quantization_HF.py b/quantization/quantization_HF.py
index 391eaa8..4d192ef 100644
--- a/quantization/quantization_HF.py
+++ b/quantization/quantization_HF.py
@@ -6,12 +6,13 @@
# PodGPT: An Audio-augmented Large Language Model for Research and Education
# Copyright (C) 2024 Kolachalama Laboratory at Boston University
+import os
import argparse
+import json
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
-from huggingface_hub import login
from utils.utils import load_config
@@ -104,7 +105,7 @@ def main(repo, bits, group_size, act_order, hf_read_token):
"weight_map": {key: "model.safetensors" for key in state_dict.keys()}, # Map all weights to a single file
}
- index_file_path = os.path.join(model_save_path, "model.safetensors.index.json")
+ index_file_path = os.path.join(f"{repo}_{bits}bit", "model.safetensors.index.json")
with open(index_file_path, "w") as f:
json.dump(index, f, indent=2)
print("Saved index file to", index_file_path)
@@ -123,7 +124,7 @@ def main(repo, bits, group_size, act_order, hf_read_token):
# Load the configuration
config = load_config(file_name="config_quantization.yml")
hf_read_token = config.get("hf_read_token")
-
+
# Conduct the GPTQ quantization
main(
config=config,
diff --git a/utils/answer_utils.py b/utils/answer_utils.py
index 654f3b8..e4c6e8f 100644
--- a/utils/answer_utils.py
+++ b/utils/answer_utils.py
@@ -374,7 +374,7 @@ def extract_answer(completion, option_range="a-eA-E"):
re.compile(rf'would be[^{potential_letters}]*\{{([{option_range}])\}}'),
re.compile(rf'would be[^{potential_letters}]*([{option_range}])\)'),
re.compile(rf'would be[^{potential_letters}]*([{option_range}])$'),
-
+
# Matches "is (A)" and similar formats
re.compile(
rf'is[^{potential_letters}]*:+[^{potential_letters}]*\n+[^{potential_letters}]*\(([{option_range}])\)'
@@ -392,7 +392,7 @@ def extract_answer(completion, option_range="a-eA-E"):
rf'is[^{potential_letters}]*:+[^{potential_letters}]*\n+[^{potential_letters}]*([{option_range}])\)'
),
re.compile(rf'is[^{potential_letters}]*\n+[^{potential_letters}]*([{option_range}])\)'),
-
+
# Matches "be (A)" and similar formats
re.compile(rf'is[^{letter_and_num}]+([{option_range}])\)'),
re.compile(rf'be[^{letter_and_num}]+([{option_range}])\)'),
@@ -400,7 +400,7 @@ def extract_answer(completion, option_range="a-eA-E"):
re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*would'),
re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*could'),
re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*will'),
-
+
# Matches "(A)" followed by any other characters
re.compile(rf':+[^{letter_and_num}]*([{option_range}])\)[^{potential_letters}]'),
re.compile(rf':+[^{letter_and_num}]*([{option_range}])\)$'),
@@ -460,7 +460,7 @@ def extract_answer(completion, option_range="a-eA-E"):
additional_patterns = [
# Matches "A"
re.compile(rf"^[^{letter_and_num}]*([{option_range}])[^{letter_and_num}]*$"),
-
+
# Matches "(A) is", "[A] is", "{A} is", and similar formats
re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*is'),
re.compile(rf'\[([{option_range}])\][^{potential_letters}]*is'),
@@ -472,7 +472,7 @@ def extract_answer(completion, option_range="a-eA-E"):
),
re.compile(rf'^([{option_range}])\)[^{potential_letters}]*is'),
re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*is'),
-
+
# Matches "(A) would", "[A] would", "{A} would", and similar formats
re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*would'),
re.compile(rf'\[([{option_range}])\][^{potential_letters}]*would'),
@@ -484,7 +484,7 @@ def extract_answer(completion, option_range="a-eA-E"):
),
re.compile(rf'^([{option_range}])\)[^{potential_letters}]*would'),
re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*would'),
-
+
# Matches "(A) could", "[A] could", "{A} could", and similar formats
re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*could'),
re.compile(rf'\[([{option_range}])\][^{potential_letters}]*could'),
@@ -496,7 +496,7 @@ def extract_answer(completion, option_range="a-eA-E"):
),
re.compile(rf'^([{option_range}])\)[^{potential_letters}]*could'),
re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*could'),
-
+
# Matches "(A) will", "[A] will", "{A} will", and similar formats
re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*will'),
re.compile(rf'\[([{option_range}])\][^{potential_letters}]*will'),
@@ -508,7 +508,7 @@ def extract_answer(completion, option_range="a-eA-E"):
),
re.compile(rf'^([{option_range}])\)[^{potential_letters}]*will'),
re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*will'),
-
+
# Matches "option: (A)" and similar formats
re.compile(rf'[oO]ption:+[^{potential_letters}]*\(([{option_range}])\)'),
re.compile(rf'[oO]ption:+[^{potential_letters}]*\[([{option_range}])\]'),
@@ -531,7 +531,7 @@ def extract_answer(completion, option_range="a-eA-E"):
rf'{letter_and_num}]'
),
re.compile(rf'[oO]ption:+[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-
+
# Matches "choice: (A)" and similar formats
re.compile(rf'[cC]hoice:+[^{potential_letters}]*\(([{option_range}])\)'),
re.compile(rf'[cC]hoice:+[^{potential_letters}]*\[([{option_range}])\]'),
@@ -554,7 +554,7 @@ def extract_answer(completion, option_range="a-eA-E"):
rf'{letter_and_num}]'
),
re.compile(rf'[cC]hoice:+[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-
+
# Matches "answer: (A)" and similar formats
re.compile(rf' is[^{potential_letters}]+\(([{option_range}])\)[^{potential_letters}]'),
re.compile(rf' is[^{potential_letters}]+\[([{option_range}])\][^{potential_letters}]'),
@@ -580,7 +580,7 @@ def extract_answer(completion, option_range="a-eA-E"):
re.compile(rf' is[^{potential_letters}]+\{{([{option_range}])\}}'),
re.compile(rf' is[^{potential_letters}]*[^{letter_and_num}]([{option_range}])\)'),
re.compile(rf' is[^{letter_and_num}]*([{option_range}])\)'),
-
+
# Matches "choice (A)" and similar formats
re.compile(rf'[cC]hoice[^{potential_letters}]*\(([{option_range}])\)'),
re.compile(rf'[cC]hoice[^{potential_letters}]*\[([{option_range}])\]'),
@@ -603,7 +603,7 @@ def extract_answer(completion, option_range="a-eA-E"):
rf'{letter_and_num}]'
),
re.compile(rf'[cC]hoice[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-
+
# Matches "answer (A)" and similar formats
re.compile(rf'[aA]nswer[^{potential_letters}]*\(([{option_range}])\)'),
re.compile(rf'[aA]nswer[^{potential_letters}]*\[([{option_range}])\]'),
@@ -625,7 +625,7 @@ def extract_answer(completion, option_range="a-eA-E"):
rf'{letter_and_num}]'
),
re.compile(rf'[aA]nswer[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-
+
# Matches "option (A)" and similar formats
re.compile(rf'[Oo]ption[^{potential_letters}]*\(([{option_range}])\)'),
re.compile(rf'[Oo]ption[^{potential_letters}]*\[([{option_range}])\]'),
diff --git a/utils/eval_utils.py b/utils/eval_utils.py
index eb564d4..382a46c 100644
--- a/utils/eval_utils.py
+++ b/utils/eval_utils.py
@@ -178,18 +178,18 @@ def performance_eval(config, mode, prompts, answers, file_path):
sampling_params,
lora_request=LoRARequest("adapter", 1, lora_path)
)
-
+
for i, output in enumerate(completions):
temp_gen = output.outputs[0].text
responses.append(temp_gen)
print('Successfully finished generating', len(prompts), 'samples!')
# Evaluating the smaller models
- # Please take a look at the above quantization codes if you are using a quantized model.
+ # Please take a look at the above quantization codes if you are using a quantized model.
elif mode == "small":
num_gpus_vllm = config.get("num_gpus_vllm")
gpu_utilization_vllm = config.get("gpu_utilization_vllm")
-
+
stop_tokens = stop_token_list()
# https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py#L38-L66
sampling_params = SamplingParams(
diff --git a/utils/utils.py b/utils/utils.py
index 92d6bff..92c2f58 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -210,7 +210,6 @@ def prompt_template_MMedLM(input=None, language="English"):
else:
question = input.split("\nA.")[0]
options = "\nA." + input.split("\nA.")[1]
-
options = options.replace(english_prompt, "")
options = options.replace(hindi_prompt, "")
options = options.replace(spanish_prompt, "")