From db7dee45bc042d2200287cdddc5707d5ea71399e Mon Sep 17 00:00:00 2001 From: SuperBruceJia Date: Mon, 16 Dec 2024 14:38:24 -0500 Subject: [PATCH] update --- .idea/.gitignore | 8 +++ .idea/PodGPT.iml | 12 ++++ .idea/inspectionProfiles/Project_Default.xml | 63 +++++++++++++++++++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/misc.xml | 7 +++ .idea/modules.xml | 8 +++ .idea/vcs.xml | 6 ++ lib/model_loader_large.py | 2 +- lib/model_loader_quantization.py | 7 ++- main_large.py | 1 - main_quantization.py | 1 - main_small.py | 1 - quantization/quantization.py | 11 +--- quantization/quantization_GPTQModel.py | 6 +- quantization/quantization_HF.py | 7 ++- utils/answer_utils.py | 26 ++++---- utils/eval_utils.py | 6 +- utils/utils.py | 1 - 18 files changed, 140 insertions(+), 39 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/PodGPT.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/PodGPT.iml b/.idea/PodGPT.iml new file mode 100644 index 0000000..bd7cd0d --- /dev/null +++ b/.idea/PodGPT.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..0b1a123 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,63 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..2426e6c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..fb29d16 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/lib/model_loader_large.py b/lib/model_loader_large.py index 55c6546..4d65e3d 100644 --- a/lib/model_loader_large.py +++ b/lib/model_loader_large.py @@ -98,7 +98,7 @@ def model_loader(config): lora_alpha=lora_alpha, lora_dropout=lora_dropout, bias="none", - # Please note that the current vLLM is not supporting + # Please note that the current vLLM is not supporting # the modules "w1", "w2", "w3", and "gate" at this point (June 20, 2024) target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj" diff --git a/lib/model_loader_quantization.py b/lib/model_loader_quantization.py index ac90e96..26109f9 100644 --- a/lib/model_loader_quantization.py +++ b/lib/model_loader_quantization.py @@ -8,7 +8,7 @@ import os from transformers import AutoTokenizer, TrainingArguments -from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig, get_gptq_peft_model +from auto_gptq import AutoGPTQForCausalLM, get_gptq_peft_model from auto_gptq.utils.peft_utils import GPTQLoraConfig from peft import TaskType from trl import SFTTrainer @@ -34,7 +34,7 @@ def model_initializer(config): model = AutoGPTQForCausalLM.from_quantized( model_name, # Since we are using the auto-gptq==0.6.0, - # We cannot use shard safetensors and here we just use the single 39.8GB single-safetensor checkpoint. + # We cannot use shard safetensors and here we just use the single 39.8GB single-safetensor checkpoint. # https://huggingface.co/shuyuej/Llama-3.3-70B-Instruct-GPTQ/tree/f77c1b3864179c38146f12656804b5b3dfd1e2a2 revision="f77c1b3", use_safetensors=True, @@ -51,7 +51,8 @@ def model_initializer(config): model.warmup_triton() # https://gist.github.com/eusip/de8fadb761741b56d5d9a6232bf979ed#file-oasst-pythia-12b-05-03-2023-py-L68-L87 - # NOTE: https://github.com/lvwerra/trl/blob/a2749d9e0c96198486b788875eda3b325f76a5c8/examples/sentiment/scripts/gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py#L181 + # https://github.com/lvwerra/trl/blob/a2749d9e0c96198486b788875eda3b325f76a5c8/examples/sentiment/scripts/ + # gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py#L181 for param in model.parameters(): # freeze base model's layers param.requires_grad = False diff --git a/main_large.py b/main_large.py index fb5cd9d..7151be3 100644 --- a/main_large.py +++ b/main_large.py @@ -90,4 +90,3 @@ def main(config): print(yaml.dump(config, default_flow_style=False), '\n\n') main(config=config) sys.stdout = sys.__stdout__ - \ No newline at end of file diff --git a/main_quantization.py b/main_quantization.py index 368be8c..fcb1437 100644 --- a/main_quantization.py +++ b/main_quantization.py @@ -90,4 +90,3 @@ def main(config): print(yaml.dump(config, default_flow_style=False), '\n\n') main(config=config) sys.stdout = sys.__stdout__ - \ No newline at end of file diff --git a/main_small.py b/main_small.py index cd6ea30..c432572 100644 --- a/main_small.py +++ b/main_small.py @@ -96,4 +96,3 @@ def main(config): print(yaml.dump(config, default_flow_style=False), '\n\n') main(config=config) sys.stdout = sys.__stdout__ - \ No newline at end of file diff --git a/quantization/quantization.py b/quantization/quantization.py index 73bc441..4f00b08 100644 --- a/quantization/quantization.py +++ b/quantization/quantization.py @@ -25,7 +25,6 @@ #################################################################################### -import time import os import logging import argparse @@ -109,7 +108,6 @@ def quantization(model_dir, output_dir, quantdataset, bits, group_size, desc_act raise ValueError(f"Unsupported dtype: {dtype}") # Load the model with specified quantization settings - logger.info(f"Loading model from {model_dir} with trust_remote_code={trust_remote_code} and dtype={torch_dtype}") model = AutoGPTQForCausalLM.from_pretrained( model_dir, quantize_config=quantize_config, @@ -119,15 +117,10 @@ def quantization(model_dir, output_dir, quantdataset, bits, group_size, desc_act ) # Perform the quantization process - logger.info(f"Starting quantization to {output_dir} with use_triton={use_triton}") - start_time = time.time() model.quantize(quantdataset, use_triton=use_triton, batch_size=batch_size) - logger.info(f"Time to quantize model at {output_dir} with use_triton={use_triton}: {time.time() - start_time:.2f}") # Save the quantized model - logger.info(f"Saving quantized model to {output_dir}") model.save_quantized(output_dir, use_safetensors=True) - logger.info("Done.") def mian(args): @@ -198,12 +191,12 @@ def mian(args): logger.error(f"Aborted. Will delete {output_dir}") os.rmdir(output_dir) abort = True - except: + except Exception: raise finally: count += 1 else: - logger.error(f"Aborting - told to stop!") + logger.error("Aborting - told to stop!") break diff --git a/quantization/quantization_GPTQModel.py b/quantization/quantization_GPTQModel.py index ea9141a..6e0c737 100644 --- a/quantization/quantization_GPTQModel.py +++ b/quantization/quantization_GPTQModel.py @@ -138,15 +138,15 @@ def mian(args): ) except KeyboardInterrupt: # Handle user interrupt - logger.error(f"Aborted. Will delete {output_dir}") + logger.error("Aborted. Will delete {output_dir}") os.rmdir(output_dir) abort = True - except: + except Exception: raise finally: count += 1 else: - logger.error(f"Aborting - told to stop!") + logger.error("Aborting - told to stop!") break diff --git a/quantization/quantization_HF.py b/quantization/quantization_HF.py index 391eaa8..4d192ef 100644 --- a/quantization/quantization_HF.py +++ b/quantization/quantization_HF.py @@ -6,12 +6,13 @@ # PodGPT: An Audio-augmented Large Language Model for Research and Education # Copyright (C) 2024 Kolachalama Laboratory at Boston University +import os import argparse +import json import torch from datasets import load_dataset from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig -from huggingface_hub import login from utils.utils import load_config @@ -104,7 +105,7 @@ def main(repo, bits, group_size, act_order, hf_read_token): "weight_map": {key: "model.safetensors" for key in state_dict.keys()}, # Map all weights to a single file } - index_file_path = os.path.join(model_save_path, "model.safetensors.index.json") + index_file_path = os.path.join(f"{repo}_{bits}bit", "model.safetensors.index.json") with open(index_file_path, "w") as f: json.dump(index, f, indent=2) print("Saved index file to", index_file_path) @@ -123,7 +124,7 @@ def main(repo, bits, group_size, act_order, hf_read_token): # Load the configuration config = load_config(file_name="config_quantization.yml") hf_read_token = config.get("hf_read_token") - + # Conduct the GPTQ quantization main( config=config, diff --git a/utils/answer_utils.py b/utils/answer_utils.py index 654f3b8..e4c6e8f 100644 --- a/utils/answer_utils.py +++ b/utils/answer_utils.py @@ -374,7 +374,7 @@ def extract_answer(completion, option_range="a-eA-E"): re.compile(rf'would be[^{potential_letters}]*\{{([{option_range}])\}}'), re.compile(rf'would be[^{potential_letters}]*([{option_range}])\)'), re.compile(rf'would be[^{potential_letters}]*([{option_range}])$'), - + # Matches "is (A)" and similar formats re.compile( rf'is[^{potential_letters}]*:+[^{potential_letters}]*\n+[^{potential_letters}]*\(([{option_range}])\)' @@ -392,7 +392,7 @@ def extract_answer(completion, option_range="a-eA-E"): rf'is[^{potential_letters}]*:+[^{potential_letters}]*\n+[^{potential_letters}]*([{option_range}])\)' ), re.compile(rf'is[^{potential_letters}]*\n+[^{potential_letters}]*([{option_range}])\)'), - + # Matches "be (A)" and similar formats re.compile(rf'is[^{letter_and_num}]+([{option_range}])\)'), re.compile(rf'be[^{letter_and_num}]+([{option_range}])\)'), @@ -400,7 +400,7 @@ def extract_answer(completion, option_range="a-eA-E"): re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*would'), re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*could'), re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*will'), - + # Matches "(A)" followed by any other characters re.compile(rf':+[^{letter_and_num}]*([{option_range}])\)[^{potential_letters}]'), re.compile(rf':+[^{letter_and_num}]*([{option_range}])\)$'), @@ -460,7 +460,7 @@ def extract_answer(completion, option_range="a-eA-E"): additional_patterns = [ # Matches "A" re.compile(rf"^[^{letter_and_num}]*([{option_range}])[^{letter_and_num}]*$"), - + # Matches "(A) is", "[A] is", "{A} is", and similar formats re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*is'), re.compile(rf'\[([{option_range}])\][^{potential_letters}]*is'), @@ -472,7 +472,7 @@ def extract_answer(completion, option_range="a-eA-E"): ), re.compile(rf'^([{option_range}])\)[^{potential_letters}]*is'), re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*is'), - + # Matches "(A) would", "[A] would", "{A} would", and similar formats re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*would'), re.compile(rf'\[([{option_range}])\][^{potential_letters}]*would'), @@ -484,7 +484,7 @@ def extract_answer(completion, option_range="a-eA-E"): ), re.compile(rf'^([{option_range}])\)[^{potential_letters}]*would'), re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*would'), - + # Matches "(A) could", "[A] could", "{A} could", and similar formats re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*could'), re.compile(rf'\[([{option_range}])\][^{potential_letters}]*could'), @@ -496,7 +496,7 @@ def extract_answer(completion, option_range="a-eA-E"): ), re.compile(rf'^([{option_range}])\)[^{potential_letters}]*could'), re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*could'), - + # Matches "(A) will", "[A] will", "{A} will", and similar formats re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*will'), re.compile(rf'\[([{option_range}])\][^{potential_letters}]*will'), @@ -508,7 +508,7 @@ def extract_answer(completion, option_range="a-eA-E"): ), re.compile(rf'^([{option_range}])\)[^{potential_letters}]*will'), re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*will'), - + # Matches "option: (A)" and similar formats re.compile(rf'[oO]ption:+[^{potential_letters}]*\(([{option_range}])\)'), re.compile(rf'[oO]ption:+[^{potential_letters}]*\[([{option_range}])\]'), @@ -531,7 +531,7 @@ def extract_answer(completion, option_range="a-eA-E"): rf'{letter_and_num}]' ), re.compile(rf'[oO]ption:+[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'), - + # Matches "choice: (A)" and similar formats re.compile(rf'[cC]hoice:+[^{potential_letters}]*\(([{option_range}])\)'), re.compile(rf'[cC]hoice:+[^{potential_letters}]*\[([{option_range}])\]'), @@ -554,7 +554,7 @@ def extract_answer(completion, option_range="a-eA-E"): rf'{letter_and_num}]' ), re.compile(rf'[cC]hoice:+[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'), - + # Matches "answer: (A)" and similar formats re.compile(rf' is[^{potential_letters}]+\(([{option_range}])\)[^{potential_letters}]'), re.compile(rf' is[^{potential_letters}]+\[([{option_range}])\][^{potential_letters}]'), @@ -580,7 +580,7 @@ def extract_answer(completion, option_range="a-eA-E"): re.compile(rf' is[^{potential_letters}]+\{{([{option_range}])\}}'), re.compile(rf' is[^{potential_letters}]*[^{letter_and_num}]([{option_range}])\)'), re.compile(rf' is[^{letter_and_num}]*([{option_range}])\)'), - + # Matches "choice (A)" and similar formats re.compile(rf'[cC]hoice[^{potential_letters}]*\(([{option_range}])\)'), re.compile(rf'[cC]hoice[^{potential_letters}]*\[([{option_range}])\]'), @@ -603,7 +603,7 @@ def extract_answer(completion, option_range="a-eA-E"): rf'{letter_and_num}]' ), re.compile(rf'[cC]hoice[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'), - + # Matches "answer (A)" and similar formats re.compile(rf'[aA]nswer[^{potential_letters}]*\(([{option_range}])\)'), re.compile(rf'[aA]nswer[^{potential_letters}]*\[([{option_range}])\]'), @@ -625,7 +625,7 @@ def extract_answer(completion, option_range="a-eA-E"): rf'{letter_and_num}]' ), re.compile(rf'[aA]nswer[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'), - + # Matches "option (A)" and similar formats re.compile(rf'[Oo]ption[^{potential_letters}]*\(([{option_range}])\)'), re.compile(rf'[Oo]ption[^{potential_letters}]*\[([{option_range}])\]'), diff --git a/utils/eval_utils.py b/utils/eval_utils.py index eb564d4..382a46c 100644 --- a/utils/eval_utils.py +++ b/utils/eval_utils.py @@ -178,18 +178,18 @@ def performance_eval(config, mode, prompts, answers, file_path): sampling_params, lora_request=LoRARequest("adapter", 1, lora_path) ) - + for i, output in enumerate(completions): temp_gen = output.outputs[0].text responses.append(temp_gen) print('Successfully finished generating', len(prompts), 'samples!') # Evaluating the smaller models - # Please take a look at the above quantization codes if you are using a quantized model. + # Please take a look at the above quantization codes if you are using a quantized model. elif mode == "small": num_gpus_vllm = config.get("num_gpus_vllm") gpu_utilization_vllm = config.get("gpu_utilization_vllm") - + stop_tokens = stop_token_list() # https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py#L38-L66 sampling_params = SamplingParams( diff --git a/utils/utils.py b/utils/utils.py index 92d6bff..92c2f58 100644 --- a/utils/utils.py +++ b/utils/utils.py @@ -210,7 +210,6 @@ def prompt_template_MMedLM(input=None, language="English"): else: question = input.split("\nA.")[0] options = "\nA." + input.split("\nA.")[1] - options = options.replace(english_prompt, "") options = options.replace(hindi_prompt, "") options = options.replace(spanish_prompt, "")