diff --git a/.gitignore b/.gitignore
index 5f514f7..00ae19d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,5 @@
 *.ipynb
-*.parquet
\ No newline at end of file
+*.parquet
+dataset/
+models/
+local_util/
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..c282e9a
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,14 @@
+# Contributing
+
+This project welcomes contributions and suggestions. Most contributions require you to
+agree to a Contributor License Agreement (CLA) declaring that you have the right to,
+and actually do, grant us the rights to use your contribution. For details, visit
+https://cla.microsoft.com.
+
+When you submit a pull request, a CLA-bot will automatically determine whether you need
+to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the
+instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
+or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
\ No newline at end of file
diff --git a/README.md b/README.md
index 086a60f..f80411b 100644
--- a/README.md
+++ b/README.md
@@ -112,3 +112,15 @@ trademarks or logos is subject to and must follow
 [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
 Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
 Any use of third-party trademarks or logos are subject to those third-party's policies.
+
+## Citation
+
+```bibtex
+@inproceedings{aggarwal2025nextcoder,
+author = {Aggarwal, Tushar and Singh, Swayam and Awasthi, Abhijeet and Kanade, Aditya and Natarajan, Nagarajan},
+title = {NextCoder: Robust Adaptation of Code LMs to Diverse Code Edits},
+booktitle = {International Conference on Machine Learning},
+year = {2025},
+url = {https://www.microsoft.com/en-us/research/publication/nextcoder-robust-adaptation-of-code-lms-to-diverse-code-edits/},
+}
+```
\ No newline at end of file
diff --git a/src/train/README.md b/src/train/README.md
index 64b81b0..cf0b489 100644
--- a/src/train/README.md
+++ b/src/train/README.md
@@ -1,12 +1,10 @@
 # Model Training scripts
 
 ## Folder Structure
-- `ds_config.json` contains the deepspeed configuration
-- `general_acc.yaml` contains the accelerate configuration (might need to be modified as per desired system)
-- `lora.py` contains the code for training model with LoRA
-- `merge_lora.py` contains the code for merging trained LoRA adapters back to model for inference
-- `seletkt.py` contains the code for training model with our algorithm explained in our paper
-- `sft.py` contains the code for training model with Full Supervised Finetuning
+- `configs` contains the deepspeed and accelerate configurations (modifialbe as per the system)
+- `lora` contains the code for training model with LoRA
+- `seletkt` contains the code for training model with SeleKT algorithm explained in our paper
+- `sft` contains the code for training model with Full Supervised Finetuning
   
 ## Usgae
 ### Preparing the dataset
@@ -23,122 +21,23 @@
 ### Training with SFT
 - modify or replace the `general_acc.yaml` file as per the desired system configuration
 - set the `zero_optimization-stage` to `3` and `overlap_comm` to `false` in `ds_config` for better memory optimizations
-- Run the following command to start training
-  ```bash
-  deepspeed sft.py \
-      --model_name_or_path "path to pretrained LLM" \
-      --train_data_path "path to training data" \
-      --output_dir "path to output dir" \
-      --num_train_epochs 3 \
-      --model_max_length 8192 \
-      --per_device_train_batch_size 4 \
-      --gradient_accumulation_steps 4 \
-      --save_strategy "epoch" \
-      --save_steps 760 \
-      --save_total_limit 25 \
-      --learning_rate 1e-5 \
-      --warmup_ratio 0.1 \
-      --logging_steps 5 \
-      --report_to "wandb" \
-      --gradient_checkpointing True \
-      --deepspeed ds_config.json \
-      --bf16 True \
-      --run_name "Run name for logs" \
-      --debug True \
-  ```
-  Update the above command as per the model
-- To train on conversation data by only applying loss on the response, uncomment the lines 175, 176 and 185 and run the same command with proper conversational dataset path
-  ```python
-    response_template = "#RESPONSE\n"
-    collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
-
-    # Initialize trainer
-    trainer = SFTTrainer(
-        model=model,
-        processing_class=tokenizer,
-        train_dataset=dataset,
-        args=training_config,
-        callbacks=[Callback(flush_steps=1)],
-        data_collator=collator, # pass the collator in the trainer
-    )
-  ```
+- Add the respecitive variables like `MODEL_PATH`, `TRAIN_DATA`, `OUTPUT_DIR` etc. in the `run.sh` script and run
+```bash
+bash ./sft/run.sh
+```
 
 ### Training with LoRA
 - modify or replace the `general_acc.yaml` file as per the desired system configuration
-- set the `zero_optimization-stage` to `2` and `overlap_comm` to `false` in `ds_config` for better memory optimizations
-- Run the following command to start training
-  ```bash
-  deepspeed lora.py \
-      --model_name_or_path "path to pretrained LLM" \
-      --train_data_path "path to training data" \
-      --output_dir "path to output dir" \
-      --num_train_epochs 3 \
-      --model_max_length 8192 \
-      --per_device_train_batch_size 4 \
-      --gradient_accumulation_steps 4 \
-      --save_strategy "epoch" \
-      --save_steps 760 \
-      --save_total_limit 25 \
-      --learning_rate 1e-5 \
-      --warmup_ratio 0.1 \
-      --logging_steps 5 \
-      --report_to "wandb" \
-      --gradient_checkpointing True \
-      --deepspeed ds_config.json \
-      --bf16 True \
-      --run_name "Run name for logs" \
-      --debug True \
-  ```
-  Update the above command as per the model
-- Put the path of output LoRA adapters inside `merge_lora.py` and run following to get the final checkpoints
-  ```bash
-  python merge_lora.py
-  ```
+- set the `zero_optimization-stage` to `2` and `overlap_comm` to `false` in `ds_config`
+- Add the respecitive variables like `MODEL_PATH`, `TRAIN_DATA`, `OUTPUT_DIR` etc. in the `run.sh` script and run
+```bash
+bash ./lora/run.sh
+```
+>`lora/lora.py` uses `use_reentrant: True` for gradient checkpointing, and this can allow using deepspeed zero-3 optimization for large models.
 
 ### Training with SeleKT
 - modify or replace the `general_acc.yaml` file as per the desired system configuration
-- set the `zero_optimization-stage` to `2` and `overlap_comm` to `false` in `ds_config` for better memory optimizations
-- Run the following command to start training
-  ```bash
-  accelerate launch \
-      --config_file=general_acc.yaml \
-      selekt.py \
-      --model_name_or_path "path to pretrained LLM" \
-      --base_model_path "path to pretrained LLM" \
-      --train_data_path "path to training data" \
-      --output_dir "path to output directory" \
-      --num_train_epochs 3 \
-      --model_max_length 8192 \
-      --per_device_train_batch_size 4 \
-      --gradient_accumulation_steps 4 \
-      --save_strategy "steps" \
-      --save_steps "Enter the periodicity value M for seleKT"  \
-      --save_total_limit 50 \
-      --learning_rate 1e-5 \
-      --warmup_ratio 0.1 \
-      --logging_steps 5 \
-      --report_to "wandb" \
-      --gradient_checkpointing True \
-      --deepspeed ds_config.json \
-      --bf16 True \
-      --run_name "Name for logs" \
-      --debug True \
-      --alpha "Enter value for desired alpha parameter for SeleKT" \
-  ```
-  Update the above command as per the model
-- To train on conversation data by only applying loss on the response, uncomment the lines 291, 292 and 301 and run the same command with proper conversational dataset path
-  ```python
-    ```python
-    response_template = "#RESPONSE\n"
-    collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
-
-    # Initialize trainer
-    trainer = SFTTrainer(
-        model=model,
-        processing_class=tokenizer,
-        train_dataset=dataset,
-        args=training_config,
-        callbacks=[Callback(flush_steps=1)],
-        data_collator=collator, # pass the collator in the trainer
-    )
-    ```
\ No newline at end of file
+- set the `zero_optimization-stage` to `3` and `overlap_comm` to `false` in `ds_config` for better memory optimizations
+- Add the respecitive variables like `MODEL_PATH`, `TRAIN_DATA`, `OUTPUT_DIR` etc. in the `run.sh` script and run
+```bash
+bash ./selekt/run.sh
\ No newline at end of file
diff --git a/src/train/SeleKT/run.sh b/src/train/SeleKT/run.sh
new file mode 100644
index 0000000..5faa5a5
--- /dev/null
+++ b/src/train/SeleKT/run.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+export MODEL_NAME=""
+export DESC=""
+
+# Stage 1: Instruction Training
+OUTPUT_DIR_STAGE1="./output/selekt_stage1_instruction"
+TRAIN_DATA_STAGE1=""
+MODEL_PATH=""
+
+# Stage 2: Conversational Training
+OUTPUT_DIR_STAGE2="./output/selekt_stage2_conversational"
+TRAIN_DATA_STAGE2=""
+
+find_latest_checkpoint() {
+    local output_dir=$1
+    local latest_checkpoint=$(find "$output_dir" -name "checkpoint-*" -type d | sort -V | tail -1)
+    echo "$latest_checkpoint"
+}
+
+echo "Starting Stage 1: SeleKT Instruction Training..."
+echo "Model: $MODEL_PATH"
+echo "Training data: $TRAIN_DATA_STAGE1"
+echo "Output directory: $OUTPUT_DIR_STAGE1"
+
+mkdir -p $OUTPUT_DIR_STAGE1
+
+# Stage 1: Instruction Training
+accelerate launch \
+      --config_file=../configs/general_acc.yaml \
+      selekt.py \
+      --model_name_or_path "$MODEL_PATH" \
+      --train_data_path "$TRAIN_DATA_STAGE1" \
+      --output_dir ${OUTPUT_DIR_STAGE1} \
+      --num_train_epochs 3 \
+      --model_max_length 16384 \
+      --per_device_train_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --save_strategy "epoch" \
+      --save_steps 760 \
+      --save_total_limit 25 \
+      --learning_rate 1e-5 \
+      --warmup_ratio 0.1 \
+      --weight_decay 0.1 \
+      --logging_steps 5 \
+      --lr_scheduler_type "cosine" \
+      --report_to "wandb" \
+      --gradient_checkpointing True \
+      --deepspeed ../configs/ds_config.json \
+      --bf16 True \
+      --run_name "${MODEL_NAME}_stage1_instruction" \
+      --alpha 0.05 \
+
+if [ $? -ne 0 ]; then
+    echo "Error: Stage 1 training failed!"
+    exit 1
+fi
+
+echo "Stage 1 completed successfully!"
+
+LATEST_CHECKPOINT=$(find_latest_checkpoint "$OUTPUT_DIR_STAGE1")
+
+if [ -z "$LATEST_CHECKPOINT" ]; then
+    echo "Error: No checkpoint found in $OUTPUT_DIR_STAGE1"
+    exit 1
+fi
+
+echo "Found latest checkpoint: $LATEST_CHECKPOINT"
+echo "Starting Stage 2: SeleKT Conversational Training..."
+echo "Model: $LATEST_CHECKPOINT"
+echo "Training data: $TRAIN_DATA_STAGE2"
+echo "Output directory: $OUTPUT_DIR_STAGE2"
+
+mkdir -p $OUTPUT_DIR_STAGE2
+
+# Stage 2: Conversational Training
+accelerate launch \
+      --config_file=../configs/general_acc.yaml \
+      selekt.py \
+      --model_name_or_path "${LATEST_CHECKPOINT}" \
+      --train_data_path "$TRAIN_DATA_STAGE2" \
+      --output_dir ${OUTPUT_DIR_STAGE2} \
+      --num_train_epochs 3 \
+      --model_max_length 16384 \
+      --per_device_train_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --save_strategy "epoch" \
+      --save_steps 760 \
+      --save_total_limit 25 \
+      --learning_rate 1e-5 \
+      --warmup_ratio 0.1 \
+      --weight_decay 0.1 \
+      --logging_steps 5 \
+      --lr_scheduler_type "cosine" \
+      --report_to "wandb" \
+      --gradient_checkpointing True \
+      --deepspeed ../configs/ds_config.json \
+      --bf16 True \
+      --run_name "${MODEL_NAME}_stage2_conversational" \
+      --alpha 0.05 \
+      --is_conversational_training \
+
+
+# Check if stage 2 completed successfully
+if [ $? -ne 0 ]; then
+    echo "Error: Stage 2 training failed!"
+    exit 1
+fi
+
+echo "Stage 2 training completed!"
+echo "Both training stages completed successfully!"
+echo "Final model saved in: $OUTPUT_DIR_STAGE2"
\ No newline at end of file
diff --git a/src/train/selekt.py b/src/train/SeleKT/selekt.py
similarity index 96%
rename from src/train/selekt.py
rename to src/train/SeleKT/selekt.py
index b7c987b..45a16c4 100644
--- a/src/train/selekt.py
+++ b/src/train/SeleKT/selekt.py
@@ -70,10 +70,11 @@ def parse_args():
                       help="Whether to use bf16 mixed precision training")
     parser.add_argument("--run_name", type=str, default=None)
     parser.add_argument("--use_liger", type=bool, default=False)
-    parser.add_argument("--debug", type=bool, default=False)
     parser.add_argument("--packing", type=bool, default=True,
                       help="Whether to use packing for training")
-    parser.add_argument("--alpha", type=float, default=0.05,)
+    parser.add_argument("--alpha", type=float, default=0.05, help="Alpha value for SeleKT")
+    parser.add_argument("--is_conversational_training", action='store_true',
+                      help="Whether to use conversational training format")
     
     args, _ = parser.parse_known_args()
     return args
@@ -300,8 +301,10 @@ def train(args):
         print(f'Resuming from checkpoint: {last_checkpoint}')
 
 
-    # response_template = "#RESPONSE\n"
-    # collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
+    collator = None
+    if args.is_conversational_training:
+      response_template = "#RESPONSE\n"
+      collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
 
     callback = Callback(base_model_path=args.base_model_path, flush_steps=1, alpha=args.alpha)
     trainer = SFTTrainer(
@@ -310,7 +313,7 @@ def train(args):
         train_dataset=dataset,
         args=training_config,
         callbacks=[callback],
-        # data_collator=collator,
+        data_collator=collator,
     )
     callback.set_trainer(trainer)
     print(f"Starting training for epoch {args.num_train_epochs}")
diff --git a/src/train/ds_config.json b/src/train/configs/ds_config.json
similarity index 100%
rename from src/train/ds_config.json
rename to src/train/configs/ds_config.json
diff --git a/src/train/general_acc.yaml b/src/train/configs/general_acc.yaml
similarity index 100%
rename from src/train/general_acc.yaml
rename to src/train/configs/general_acc.yaml
diff --git a/src/train/lora.py b/src/train/lora/lora.py
similarity index 94%
rename from src/train/lora.py
rename to src/train/lora/lora.py
index 01fcd6a..6385456 100644
--- a/src/train/lora.py
+++ b/src/train/lora/lora.py
@@ -66,9 +66,10 @@ def parse_args():
                       help="Whether to use bf16 mixed precision training")
     parser.add_argument("--run_name", type=str, default=None)
     parser.add_argument("--use_liger", type=bool, default=False)
-    parser.add_argument("--debug", type=bool, default=False)
     parser.add_argument("--packing", type=bool, default=True,
                       help="Whether to use packing for training")
+    parser.add_argument("--is_conversational_training", action='store_true',
+                      help="Whether to use conversational training format")
     
     args, _ = parser.parse_known_args()
     return args
@@ -151,12 +152,13 @@ def main():
         output_dir=args.output_dir,
         report_to="none",
         gradient_checkpointing=args.gradient_checkpointing,
-        gradient_checkpointing_kwargs={"use_reentrant": False},
+        gradient_checkpointing_kwargs={"use_reentrant": True},
         deepspeed=args.deepspeed,
         dataset_num_proc=80,
         run_name=args.run_name,
         use_liger=args.use_liger,
         )
+    
     lora_config = LoraConfig(
         r=64,
         # target_modules= ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
@@ -175,6 +177,11 @@ def main():
     
     dataset = setup_training_data(args, local_rank, tokenizer)
 
+    collator = None
+    if args.is_conversational_training:
+      response_template = "#RESPONSE\n"
+      collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
+
     trainer = SFTTrainer(
         model=model,
         processing_class=tokenizer,
@@ -182,6 +189,7 @@ def main():
         args=training_config,
         peft_config=lora_config,
         callbacks=[Callback(flush_steps=1)],
+        data_collator=collator
     )
     
     print("Starting LoRA training...")
diff --git a/src/train/lora/merge_lora.py b/src/train/lora/merge_lora.py
new file mode 100644
index 0000000..de4927b
--- /dev/null
+++ b/src/train/lora/merge_lora.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+import argparse
+import torch
+from peft import AutoPeftModelForCausalLM
+from transformers import AutoTokenizer
+import os
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Merge LoRA weights with base model")
+    parser.add_argument("--lora_checkpoint", type=str, required=True,
+                       help="Path to the LoRA checkpoint directory")
+    parser.add_argument("--output_dir", type=str, required=True,
+                       help="Directory to save the merged model")
+    parser.add_argument("--max_shard_size", type=str, default="5GB",
+                       help="Maximum size of each shard when saving")
+    parser.add_argument("--safe_serialization", action="store_true", default=True,
+                       help="Use safe serialization format")
+    return parser.parse_args()
+
+def merge_lora_weights(lora_checkpoint, output_dir, max_shard_size="5GB", safe_serialization=True):
+    """
+    Merge LoRA adapter weights with the base model
+    """
+    print(f"Loading LoRA model from: {lora_checkpoint}")
+    
+    peft_model = AutoPeftModelForCausalLM.from_pretrained(
+        lora_checkpoint,
+        torch_dtype=torch.bfloat16,
+        device_map="auto"
+    )
+    
+    print(f"Loading tokenizer from: {lora_checkpoint}")
+    tokenizer = AutoTokenizer.from_pretrained(lora_checkpoint)
+    
+    print("Merging LoRA adapters with base model...")
+    merged_model = peft_model.merge_and_unload()
+    
+    print(f"Saving merged model to: {output_dir}")
+    os.makedirs(output_dir, exist_ok=True)
+    
+    merged_model.save_pretrained(
+        output_dir,
+        max_shard_size=max_shard_size,
+        safe_serialization=safe_serialization
+    )
+    
+    # Save the tokenizer
+    tokenizer.save_pretrained(output_dir)
+    
+    print(f"✅ Successfully merged and saved model to: {output_dir}")
+    
+    del peft_model, merged_model
+    torch.cuda.empty_cache()
+    
+    return output_dir
+
+def main():
+    args = parse_args()
+    
+    try:
+        merge_lora_weights(
+            lora_checkpoint=args.lora_checkpoint,
+            output_dir=args.output_dir,
+            max_shard_size=args.max_shard_size,
+            safe_serialization=args.safe_serialization
+        )
+    except Exception as e:
+        print(f"❌ Error during merging: {str(e)}")
+        raise e
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/train/lora/run.sh b/src/train/lora/run.sh
new file mode 100644
index 0000000..2607b44
--- /dev/null
+++ b/src/train/lora/run.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+
+export MODEL_NAME=""
+export DESC=""
+
+# Stage 1: Instruction Training
+OUTPUT_DIR_STAGE1="./output/stage1_instruction_lora"
+TRAIN_DATA_STAGE1=""
+MODEL_PATH=""
+
+# Stage 2: Conversational Training  
+OUTPUT_DIR_STAGE2="./output/stage2_conversational_lora"
+TRAIN_DATA_STAGE2=""
+
+# Merged model directory
+MERGED_MODEL_DIR="./output/stage1_merged"
+
+find_latest_checkpoint() {
+    local output_dir=$1
+    local latest_checkpoint=$(find "$output_dir" -name "checkpoint-*" -type d | sort -V | tail -1)
+    echo "$latest_checkpoint"
+}
+
+merge_lora_weights() {
+    local lora_checkpoint=$1
+    local output_dir=$2
+    
+    echo "Merging LoRA weights..."
+    echo "LoRA checkpoint: $lora_checkpoint" 
+    echo "Output: $output_dir"
+    
+    python3 merge_lora.py \
+        --lora_checkpoint "$lora_checkpoint" \
+        --output_dir "$output_dir" \
+        --safe_serialization
+    
+    return $?
+}
+
+echo "Starting Stage 1: Instruction Training (LoRA)..."
+echo "Model: $MODEL_PATH"
+echo "Training data: $TRAIN_DATA_STAGE1"
+echo "Output directory: $OUTPUT_DIR_STAGE1"
+
+mkdir -p $OUTPUT_DIR_STAGE1
+
+# Stage 1: LoRA Instruction Training
+accelerate launch \
+      --config_file=../configs/general_acc.yaml \
+      lora.py \
+      --model_name_or_path "$MODEL_PATH" \
+      --train_data_path "$TRAIN_DATA_STAGE1" \
+      --output_dir ${OUTPUT_DIR_STAGE1} \
+      --num_train_epochs 3 \
+      --model_max_length 16384 \
+      --per_device_train_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --save_strategy "epoch" \
+      --save_steps 760 \
+      --save_total_limit 25 \
+      --learning_rate 1e-5 \
+      --warmup_ratio 0.1 \
+      --weight_decay 0.1 \
+      --logging_steps 5 \
+      --lr_scheduler_type "cosine" \
+      --report_to "wandb" \
+      --gradient_checkpointing True \
+      --deepspeed ../configs/ds_config.json \
+      --bf16 True \
+      --run_name "${MODEL_NAME}_stage1_instruction_lora" \
+
+if [ $? -ne 0 ]; then
+    echo "Error: Stage 1 training failed!"
+    exit 1
+fi
+
+echo "Stage 1 completed successfully!"
+
+# Find latest checkpoint
+LATEST_CHECKPOINT=$(find_latest_checkpoint "$OUTPUT_DIR_STAGE1")
+
+if [ -z "$LATEST_CHECKPOINT" ]; then
+    echo "Error: No checkpoint found in $OUTPUT_DIR_STAGE1"
+    exit 1
+fi
+
+echo "Found latest checkpoint: $LATEST_CHECKPOINT"
+
+# Merge LoRA weights with base model
+mkdir -p $MERGED_MODEL_DIR
+merge_lora_weights "$LATEST_CHECKPOINT" "$MERGED_MODEL_DIR"
+
+if [ $? -ne 0 ]; then
+    echo "Error: LoRA merging failed!"
+    exit 1
+fi
+
+echo "LoRA weights merged successfully!"
+echo "Starting Stage 2: Conversational Training (LoRA)..."
+echo "Model: $MERGED_MODEL_DIR"
+echo "Training data: $TRAIN_DATA_STAGE2"
+echo "Output directory: $OUTPUT_DIR_STAGE2"
+
+mkdir -p $OUTPUT_DIR_STAGE2
+
+# Stage 2: LoRA Conversational Training
+accelerate launch \
+      --config_file=../configs/general_acc.yaml \
+      lora.py \
+      --model_name_or_path "${MERGED_MODEL_DIR}" \
+      --train_data_path "$TRAIN_DATA_STAGE2" \
+      --output_dir ${OUTPUT_DIR_STAGE2} \
+      --num_train_epochs 3 \
+      --model_max_length 16384 \
+      --per_device_train_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --save_strategy "epoch" \
+      --save_steps 760 \
+      --save_total_limit 25 \
+      --learning_rate 1e-5 \
+      --warmup_ratio 0.1 \
+      --weight_decay 0.1 \
+      --logging_steps 5 \
+      --lr_scheduler_type "cosine" \
+      --report_to "wandb" \
+      --gradient_checkpointing True \
+      --deepspeed ../configs/ds_config.json \
+      --bf16 True \
+      --run_name "${MODEL_NAME}_stage2_conversational_lora" \
+      --is_conversational_training \
+
+if [ $? -ne 0 ]; then
+    echo "Error: Stage 2 training failed!"
+    exit 1
+fi
+
+echo "Stage 2 training completed successfully!"
+
+# Find final checkpoint and merge again
+FINAL_CHECKPOINT=$(find_latest_checkpoint "$OUTPUT_DIR_STAGE2")
+FINAL_MERGED_DIR="./output/final_merged_model"
+
+if [ ! -z "$FINAL_CHECKPOINT" ]; then
+    echo "Merging final LoRA weights..."
+    mkdir -p $FINAL_MERGED_DIR
+    merge_lora_weights "$FINAL_CHECKPOINT" "$FINAL_MERGED_DIR"
+    echo "Final merged model saved in: $FINAL_MERGED_DIR"
+else
+    echo "Warning: No final checkpoint found, using stage 2 output directory"
+fi
+
+echo "Both training stages completed successfully!"
+echo "LoRA adapters saved in: $OUTPUT_DIR_STAGE2"
+echo "Final merged model saved in: $FINAL_MERGED_DIR"
\ No newline at end of file
diff --git a/src/train/merge_lora.py b/src/train/merge_lora.py
deleted file mode 100644
index f2c16dd..0000000
--- a/src/train/merge_lora.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from peft import AutoPeftModelForCausalLM
-from transformers import AutoTokenizer
-
-checkpoints = [] # add the paths to the checkpoints here
-
-
-for lora_checkpoint in checkpoints[1:]:
-  peft_model = AutoPeftModelForCausalLM.from_pretrained(lora_checkpoint)
-  tokenizer = AutoTokenizer.from_pretrained(lora_checkpoint)
-
-  merged_model = peft_model.merge_and_unload()
-  print(type(merged_model))
-  output_path = lora_checkpoint + "-merged"
-  merged_model.save_pretrained(output_path)
-  tokenizer.save_pretrained(output_path)
-  print(f"Model saved at {output_path}")
diff --git a/src/train/sft/run.sh b/src/train/sft/run.sh
new file mode 100644
index 0000000..e534fe0
--- /dev/null
+++ b/src/train/sft/run.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+export MODEL_NAME=""
+export DESC=""
+
+# Stage 1: Instruction Training
+OUTPUT_DIR_STAGE1="./output/sft_stage1_instruction"
+TRAIN_DATA_STAGE1=""
+MODEL_PATH=""
+
+# Stage 2: Conversational Training
+OUTPUT_DIR_STAGE2="./output/sft_stage2_conversational"
+TRAIN_DATA_STAGE2=""
+
+find_latest_checkpoint() {
+    local output_dir=$1
+    local latest_checkpoint=$(find "$output_dir" -name "checkpoint-*" -type d | sort -V | tail -1)
+    echo "$latest_checkpoint"
+}
+
+echo "Starting Stage 1: Instruction Training..."
+echo "Model: $MODEL_PATH"
+echo "Training data: $TRAIN_DATA_STAGE1"
+echo "Output directory: $OUTPUT_DIR_STAGE1"
+
+mkdir -p $OUTPUT_DIR_STAGE1
+
+# Stage 1: Instruction Training
+accelerate launch \
+      --config_file=../configs/general_acc.yaml \
+      sft.py \
+      --model_name_or_path "$MODEL_PATH" \
+      --train_data_path "$TRAIN_DATA_STAGE1" \
+      --output_dir ${OUTPUT_DIR_STAGE1} \
+      --num_train_epochs 3 \
+      --model_max_length 16384 \
+      --per_device_train_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --save_strategy "epoch" \
+      --save_steps 760 \
+      --save_total_limit 25 \
+      --learning_rate 1e-5 \
+      --warmup_ratio 0.1 \
+      --weight_decay 0.1 \
+      --logging_steps 5 \
+      --lr_scheduler_type "cosine" \
+      --report_to "wandb" \
+      --gradient_checkpointing True \
+      --deepspeed ../configs/ds_config.json \
+      --bf16 True \
+      --run_name "${MODEL_NAME}_stage1_instruction" \
+
+if [ $? -ne 0 ]; then
+    echo "Error: Stage 1 training failed!"
+    exit 1
+fi
+
+echo "Stage 1 completed successfully!"
+
+LATEST_CHECKPOINT=$(find_latest_checkpoint "$OUTPUT_DIR_STAGE1")
+
+if [ -z "$LATEST_CHECKPOINT" ]; then
+    echo "Error: No checkpoint found in $OUTPUT_DIR_STAGE1"
+    exit 1
+fi
+
+echo "Found latest checkpoint: $LATEST_CHECKPOINT"
+echo "Starting Stage 2: Conversational Training..."
+echo "Model: $LATEST_CHECKPOINT"
+echo "Training data: $TRAIN_DATA_STAGE2"
+echo "Output directory: $OUTPUT_DIR_STAGE2"
+
+mkdir -p $OUTPUT_DIR_STAGE2
+
+# Stage 2: Conversational Training
+accelerate launch \
+      --config_file=../configs/general_acc.yaml \
+      sft.py \
+      --model_name_or_path "${LATEST_CHECKPOINT}" \
+      --train_data_path "$TRAIN_DATA_STAGE2" \
+      --output_dir ${OUTPUT_DIR_STAGE2} \
+      --num_train_epochs 3 \
+      --model_max_length 16384 \
+      --per_device_train_batch_size 1 \
+      --gradient_accumulation_steps 4 \
+      --save_strategy "epoch" \
+      --save_steps 760 \
+      --save_total_limit 25 \
+      --learning_rate 1e-5 \
+      --warmup_ratio 0.1 \
+      --weight_decay 0.1 \
+      --logging_steps 5 \
+      --lr_scheduler_type "cosine" \
+      --report_to "wandb" \
+      --gradient_checkpointing True \
+      --deepspeed ../configs/ds_config.json \
+      --bf16 True \
+      --run_name "${MODEL_NAME}_stage2_conversational" \
+      --is_conversational_training \
+
+
+# Check if stage 2 completed successfully
+if [ $? -ne 0 ]; then
+    echo "Error: Stage 2 training failed!"
+    exit 1
+fi
+
+echo "Stage 2 training completed!"
+echo "Both training stages completed successfully!"
+echo "Final model saved in: $OUTPUT_DIR_STAGE2"
\ No newline at end of file
diff --git a/src/train/sft.py b/src/train/sft/sft.py
similarity index 95%
rename from src/train/sft.py
rename to src/train/sft/sft.py
index ebdd7e0..08a4abd 100644
--- a/src/train/sft.py
+++ b/src/train/sft/sft.py
@@ -62,9 +62,10 @@ def parse_args():
                       help="Whether to use bf16 mixed precision training")
     parser.add_argument("--run_name", type=str, default=None)
     parser.add_argument("--use_liger", type=bool, default=False)
-    parser.add_argument("--debug", type=bool, default=False)
     parser.add_argument("--packing", type=bool, default=True,
                       help="Whether to use packing for training")
+    parser.add_argument("--is_conversational_training", action='store_true',
+                      help="Whether to use conversational training format")
     
     args, _ = parser.parse_known_args()
     return args
@@ -108,7 +109,6 @@ def __init__(self, flush_steps=None):
         self.flush_steps = flush_steps
 
     def on_step_end(self, args, state, control, model, processing_class , **kwargs):
-        # import sys; sys.exit(0)
         if state.global_step % self.flush_steps == 0:
             get_accelerator().empty_cache()
             if dist.is_initialized():
@@ -172,8 +172,10 @@ def main():
     if last_checkpoint:
         print(f'Resuming from checkpoint: {last_checkpoint}')
 
-    # response_template = "#RESPONSE\n"
-    # collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
+    collator = None
+    if args.is_conversational_training:
+      response_template = "#RESPONSE\n"
+      collator = DataCollatorForCompletionOnlyLM(response_template=response_template, tokenizer=tokenizer)
 
     # Initialize trainer
     trainer = SFTTrainer(
@@ -182,7 +184,7 @@ def main():
         train_dataset=dataset,
         args=training_config,
         callbacks=[Callback(flush_steps=1)],
-        # data_collator=collator,
+        data_collator=collator,
     )
     
     # Start training