diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/PodGPT.iml b/.idea/PodGPT.iml
new file mode 100644
index 0000000..bd7cd0d
--- /dev/null
+++ b/.idea/PodGPT.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="pytorch" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..0b1a123
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,63 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <Languages>
+        <language minSize="2553" name="Python" />
+      </Languages>
+    </inspection_tool>
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="LanguageDetectionInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyArgumentListInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyInterpreterInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="WARNING" enabled_by_default="false">
+      <option name="ignoredPackages">
+        <value>
+          <list size="25">
+            <item index="0" class="java.lang.String" itemvalue="transformers" />
+            <item index="1" class="java.lang.String" itemvalue="peft" />
+            <item index="2" class="java.lang.String" itemvalue="accelerate" />
+            <item index="3" class="java.lang.String" itemvalue="bitsandbytes" />
+            <item index="4" class="java.lang.String" itemvalue="trl" />
+            <item index="5" class="java.lang.String" itemvalue="protobuf" />
+            <item index="6" class="java.lang.String" itemvalue="typing_extensions" />
+            <item index="7" class="java.lang.String" itemvalue="googletrans" />
+            <item index="8" class="java.lang.String" itemvalue="monai-weekly" />
+            <item index="9" class="java.lang.String" itemvalue="pyflakes" />
+            <item index="10" class="java.lang.String" itemvalue="pytorch_lightning" />
+            <item index="11" class="java.lang.String" itemvalue="wandb" />
+            <item index="12" class="java.lang.String" itemvalue="tensorboard" />
+            <item index="13" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="14" class="java.lang.String" itemvalue="nltk" />
+            <item index="15" class="java.lang.String" itemvalue="sentencepiece" />
+            <item index="16" class="java.lang.String" itemvalue="jsonlines" />
+            <item index="17" class="java.lang.String" itemvalue="fraction" />
+            <item index="18" class="java.lang.String" itemvalue="pandas" />
+            <item index="19" class="java.lang.String" itemvalue="vllm" />
+            <item index="20" class="java.lang.String" itemvalue="vllm-flash-attn" />
+            <item index="21" class="java.lang.String" itemvalue="huggingface-hub" />
+            <item index="22" class="java.lang.String" itemvalue="torch" />
+            <item index="23" class="java.lang.String" itemvalue="autoawq" />
+            <item index="24" class="java.lang.String" itemvalue="auto-gptq" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E127" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N801" />
+          <option value="N802" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyRedeclarationInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..2426e6c
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="pytorch" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="pytorch" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..fb29d16
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PodGPT.iml" filepath="$PROJECT_DIR$/.idea/PodGPT.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/lib/model_loader_large.py b/lib/model_loader_large.py
index 55c6546..4d65e3d 100644
--- a/lib/model_loader_large.py
+++ b/lib/model_loader_large.py
@@ -98,7 +98,7 @@ def model_loader(config):
             lora_alpha=lora_alpha,
             lora_dropout=lora_dropout,
             bias="none",
-            # Please note that the current vLLM is not supporting 
+            # Please note that the current vLLM is not supporting
             # the modules "w1", "w2", "w3", and "gate" at this point (June 20, 2024)
             target_modules=[
                 "q_proj", "k_proj", "v_proj", "o_proj"
diff --git a/lib/model_loader_quantization.py b/lib/model_loader_quantization.py
index ac90e96..26109f9 100644
--- a/lib/model_loader_quantization.py
+++ b/lib/model_loader_quantization.py
@@ -8,7 +8,7 @@
 import os
 
 from transformers import AutoTokenizer, TrainingArguments
-from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig, get_gptq_peft_model
+from auto_gptq import AutoGPTQForCausalLM, get_gptq_peft_model
 from auto_gptq.utils.peft_utils import GPTQLoraConfig
 from peft import TaskType
 from trl import SFTTrainer
@@ -34,7 +34,7 @@ def model_initializer(config):
     model = AutoGPTQForCausalLM.from_quantized(
         model_name,
         # Since we are using the auto-gptq==0.6.0,
-        # We cannot use shard safetensors and here we just use the single 39.8GB single-safetensor checkpoint. 
+        # We cannot use shard safetensors and here we just use the single 39.8GB single-safetensor checkpoint.
         # https://huggingface.co/shuyuej/Llama-3.3-70B-Instruct-GPTQ/tree/f77c1b3864179c38146f12656804b5b3dfd1e2a2
         revision="f77c1b3",
         use_safetensors=True,
@@ -51,7 +51,8 @@ def model_initializer(config):
     model.warmup_triton()
 
     # https://gist.github.com/eusip/de8fadb761741b56d5d9a6232bf979ed#file-oasst-pythia-12b-05-03-2023-py-L68-L87
-    # NOTE: https://github.com/lvwerra/trl/blob/a2749d9e0c96198486b788875eda3b325f76a5c8/examples/sentiment/scripts/gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py#L181
+    # https://github.com/lvwerra/trl/blob/a2749d9e0c96198486b788875eda3b325f76a5c8/examples/sentiment/scripts/
+    # gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py#L181
     for param in model.parameters():
         # freeze base model's layers
         param.requires_grad = False
diff --git a/main_large.py b/main_large.py
index fb5cd9d..7151be3 100644
--- a/main_large.py
+++ b/main_large.py
@@ -90,4 +90,3 @@ def main(config):
     print(yaml.dump(config, default_flow_style=False), '\n\n')
     main(config=config)
     sys.stdout = sys.__stdout__
-    
\ No newline at end of file
diff --git a/main_quantization.py b/main_quantization.py
index 368be8c..fcb1437 100644
--- a/main_quantization.py
+++ b/main_quantization.py
@@ -90,4 +90,3 @@ def main(config):
     print(yaml.dump(config, default_flow_style=False), '\n\n')
     main(config=config)
     sys.stdout = sys.__stdout__
-    
\ No newline at end of file
diff --git a/main_small.py b/main_small.py
index cd6ea30..c432572 100644
--- a/main_small.py
+++ b/main_small.py
@@ -96,4 +96,3 @@ def main(config):
     print(yaml.dump(config, default_flow_style=False), '\n\n')
     main(config=config)
     sys.stdout = sys.__stdout__
-    
\ No newline at end of file
diff --git a/quantization/quantization.py b/quantization/quantization.py
index 73bc441..4f00b08 100644
--- a/quantization/quantization.py
+++ b/quantization/quantization.py
@@ -25,7 +25,6 @@
 
 ####################################################################################
 
-import time
 import os
 import logging
 import argparse
@@ -109,7 +108,6 @@ def quantization(model_dir, output_dir, quantdataset, bits, group_size, desc_act
         raise ValueError(f"Unsupported dtype: {dtype}")
 
     # Load the model with specified quantization settings
-    logger.info(f"Loading model from {model_dir} with trust_remote_code={trust_remote_code} and dtype={torch_dtype}")
     model = AutoGPTQForCausalLM.from_pretrained(
         model_dir,
         quantize_config=quantize_config,
@@ -119,15 +117,10 @@ def quantization(model_dir, output_dir, quantdataset, bits, group_size, desc_act
     )
 
     # Perform the quantization process
-    logger.info(f"Starting quantization to {output_dir} with use_triton={use_triton}")
-    start_time = time.time()
     model.quantize(quantdataset, use_triton=use_triton, batch_size=batch_size)
-    logger.info(f"Time to quantize model at {output_dir} with use_triton={use_triton}: {time.time() - start_time:.2f}")
 
     # Save the quantized model
-    logger.info(f"Saving quantized model to {output_dir}")
     model.save_quantized(output_dir, use_safetensors=True)
-    logger.info("Done.")
 
 
 def mian(args):
@@ -198,12 +191,12 @@ def mian(args):
                     logger.error(f"Aborted. Will delete {output_dir}")
                     os.rmdir(output_dir)
                     abort = True
-                except:
+                except Exception:
                     raise
             finally:
                 count += 1
         else:
-            logger.error(f"Aborting - told to stop!")
+            logger.error("Aborting - told to stop!")
             break
 
 
diff --git a/quantization/quantization_GPTQModel.py b/quantization/quantization_GPTQModel.py
index ea9141a..6e0c737 100644
--- a/quantization/quantization_GPTQModel.py
+++ b/quantization/quantization_GPTQModel.py
@@ -138,15 +138,15 @@ def mian(args):
                     )
                 except KeyboardInterrupt:
                     # Handle user interrupt
-                    logger.error(f"Aborted. Will delete {output_dir}")
+                    logger.error("Aborted. Will delete {output_dir}")
                     os.rmdir(output_dir)
                     abort = True
-                except:
+                except Exception:
                     raise
             finally:
                 count += 1
         else:
-            logger.error(f"Aborting - told to stop!")
+            logger.error("Aborting - told to stop!")
             break
 
 
diff --git a/quantization/quantization_HF.py b/quantization/quantization_HF.py
index 391eaa8..4d192ef 100644
--- a/quantization/quantization_HF.py
+++ b/quantization/quantization_HF.py
@@ -6,12 +6,13 @@
 # PodGPT: An Audio-augmented Large Language Model for Research and Education
 # Copyright (C) 2024 Kolachalama Laboratory at Boston University
 
+import os
 import argparse
+import json
 
 import torch
 from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
-from huggingface_hub import login
 
 from utils.utils import load_config
 
@@ -104,7 +105,7 @@ def main(repo, bits, group_size, act_order, hf_read_token):
         "weight_map": {key: "model.safetensors" for key in state_dict.keys()},  # Map all weights to a single file
     }
 
-    index_file_path = os.path.join(model_save_path, "model.safetensors.index.json")
+    index_file_path = os.path.join(f"{repo}_{bits}bit", "model.safetensors.index.json")
     with open(index_file_path, "w") as f:
         json.dump(index, f, indent=2)
     print("Saved index file to", index_file_path)
@@ -123,7 +124,7 @@ def main(repo, bits, group_size, act_order, hf_read_token):
     # Load the configuration
     config = load_config(file_name="config_quantization.yml")
     hf_read_token = config.get("hf_read_token")
-    
+
     # Conduct the GPTQ quantization
     main(
         config=config,
diff --git a/utils/answer_utils.py b/utils/answer_utils.py
index 654f3b8..e4c6e8f 100644
--- a/utils/answer_utils.py
+++ b/utils/answer_utils.py
@@ -374,7 +374,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         re.compile(rf'would be[^{potential_letters}]*\{{([{option_range}])\}}'),
         re.compile(rf'would be[^{potential_letters}]*([{option_range}])\)'),
         re.compile(rf'would be[^{potential_letters}]*([{option_range}])$'),
-        
+
         # Matches "is (A)" and similar formats
         re.compile(
             rf'is[^{potential_letters}]*:+[^{potential_letters}]*\n+[^{potential_letters}]*\(([{option_range}])\)'
@@ -392,7 +392,7 @@ def extract_answer(completion, option_range="a-eA-E"):
             rf'is[^{potential_letters}]*:+[^{potential_letters}]*\n+[^{potential_letters}]*([{option_range}])\)'
         ),
         re.compile(rf'is[^{potential_letters}]*\n+[^{potential_letters}]*([{option_range}])\)'),
-        
+
         # Matches "be (A)" and similar formats
         re.compile(rf'is[^{letter_and_num}]+([{option_range}])\)'),
         re.compile(rf'be[^{letter_and_num}]+([{option_range}])\)'),
@@ -400,7 +400,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*would'),
         re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*could'),
         re.compile(rf'[^{letter_and_num}]+([{option_range}])\)[^{potential_letters}]*will'),
-        
+
         # Matches "(A)" followed by any other characters
         re.compile(rf':+[^{letter_and_num}]*([{option_range}])\)[^{potential_letters}]'),
         re.compile(rf':+[^{letter_and_num}]*([{option_range}])\)$'),
@@ -460,7 +460,7 @@ def extract_answer(completion, option_range="a-eA-E"):
     additional_patterns = [
         # Matches "A"
         re.compile(rf"^[^{letter_and_num}]*([{option_range}])[^{letter_and_num}]*$"),
-        
+
         # Matches "(A) is", "[A] is", "{A} is", and similar formats
         re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*is'),
         re.compile(rf'\[([{option_range}])\][^{potential_letters}]*is'),
@@ -472,7 +472,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         ),
         re.compile(rf'^([{option_range}])\)[^{potential_letters}]*is'),
         re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*is'),
-        
+
         # Matches "(A) would", "[A] would", "{A} would", and similar formats
         re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*would'),
         re.compile(rf'\[([{option_range}])\][^{potential_letters}]*would'),
@@ -484,7 +484,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         ),
         re.compile(rf'^([{option_range}])\)[^{potential_letters}]*would'),
         re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*would'),
-        
+
         # Matches "(A) could", "[A] could", "{A} could", and similar formats
         re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*could'),
         re.compile(rf'\[([{option_range}])\][^{potential_letters}]*could'),
@@ -496,7 +496,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         ),
         re.compile(rf'^([{option_range}])\)[^{potential_letters}]*could'),
         re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*could'),
-        
+
         # Matches "(A) will", "[A] will", "{A} will", and similar formats
         re.compile(rf'\(([{option_range}])\)[^{potential_letters}]*will'),
         re.compile(rf'\[([{option_range}])\][^{potential_letters}]*will'),
@@ -508,7 +508,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         ),
         re.compile(rf'^([{option_range}])\)[^{potential_letters}]*will'),
         re.compile(rf'^([{option_range}])[^{letter_and_num}][^{potential_letters}]*will'),
-        
+
         # Matches "option: (A)" and similar formats
         re.compile(rf'[oO]ption:+[^{potential_letters}]*\(([{option_range}])\)'),
         re.compile(rf'[oO]ption:+[^{potential_letters}]*\[([{option_range}])\]'),
@@ -531,7 +531,7 @@ def extract_answer(completion, option_range="a-eA-E"):
             rf'{letter_and_num}]'
         ),
         re.compile(rf'[oO]ption:+[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-        
+
         # Matches "choice: (A)" and similar formats
         re.compile(rf'[cC]hoice:+[^{potential_letters}]*\(([{option_range}])\)'),
         re.compile(rf'[cC]hoice:+[^{potential_letters}]*\[([{option_range}])\]'),
@@ -554,7 +554,7 @@ def extract_answer(completion, option_range="a-eA-E"):
             rf'{letter_and_num}]'
         ),
         re.compile(rf'[cC]hoice:+[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-        
+
         # Matches "answer: (A)" and similar formats
         re.compile(rf' is[^{potential_letters}]+\(([{option_range}])\)[^{potential_letters}]'),
         re.compile(rf' is[^{potential_letters}]+\[([{option_range}])\][^{potential_letters}]'),
@@ -580,7 +580,7 @@ def extract_answer(completion, option_range="a-eA-E"):
         re.compile(rf' is[^{potential_letters}]+\{{([{option_range}])\}}'),
         re.compile(rf' is[^{potential_letters}]*[^{letter_and_num}]([{option_range}])\)'),
         re.compile(rf' is[^{letter_and_num}]*([{option_range}])\)'),
-        
+
         # Matches "choice (A)" and similar formats
         re.compile(rf'[cC]hoice[^{potential_letters}]*\(([{option_range}])\)'),
         re.compile(rf'[cC]hoice[^{potential_letters}]*\[([{option_range}])\]'),
@@ -603,7 +603,7 @@ def extract_answer(completion, option_range="a-eA-E"):
             rf'{letter_and_num}]'
         ),
         re.compile(rf'[cC]hoice[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-        
+
         # Matches "answer (A)" and similar formats
         re.compile(rf'[aA]nswer[^{potential_letters}]*\(([{option_range}])\)'),
         re.compile(rf'[aA]nswer[^{potential_letters}]*\[([{option_range}])\]'),
@@ -625,7 +625,7 @@ def extract_answer(completion, option_range="a-eA-E"):
             rf'{letter_and_num}]'
         ),
         re.compile(rf'[aA]nswer[^{potential_letters}]*[^{letter_and_num}]([{option_range}])$'),
-        
+
         # Matches "option (A)" and similar formats
         re.compile(rf'[Oo]ption[^{potential_letters}]*\(([{option_range}])\)'),
         re.compile(rf'[Oo]ption[^{potential_letters}]*\[([{option_range}])\]'),
diff --git a/utils/eval_utils.py b/utils/eval_utils.py
index eb564d4..382a46c 100644
--- a/utils/eval_utils.py
+++ b/utils/eval_utils.py
@@ -178,18 +178,18 @@ def performance_eval(config, mode, prompts, answers, file_path):
                     sampling_params,
                     lora_request=LoRARequest("adapter", 1, lora_path)
                 )
-                
+
         for i, output in enumerate(completions):
             temp_gen = output.outputs[0].text
             responses.append(temp_gen)
         print('Successfully finished generating', len(prompts), 'samples!')
 
     # Evaluating the smaller models
-    # Please take a look at the above quantization codes if you are using a quantized model. 
+    # Please take a look at the above quantization codes if you are using a quantized model.
     elif mode == "small":
         num_gpus_vllm = config.get("num_gpus_vllm")
         gpu_utilization_vllm = config.get("gpu_utilization_vllm")
-        
+
         stop_tokens = stop_token_list()
         # https://github.com/vllm-project/vllm/blob/main/vllm/sampling_params.py#L38-L66
         sampling_params = SamplingParams(
diff --git a/utils/utils.py b/utils/utils.py
index 92d6bff..92c2f58 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -210,7 +210,6 @@ def prompt_template_MMedLM(input=None, language="English"):
     else:
         question = input.split("\nA.")[0]
         options = "\nA." + input.split("\nA.")[1]
-        
         options = options.replace(english_prompt, "")
         options = options.replace(hindi_prompt, "")
         options = options.replace(spanish_prompt, "")