-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9048324
commit 894d2a1
Showing
11 changed files
with
291 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=Odesia-qwen32B | ||
#SBATCH --cpus-per-task=16 | ||
#SBATCH --nodes=1 | ||
#SBATCH --gres=gpu:8 | ||
#SBATCH --mem=64G | ||
#SBATCH --output=.slurm/Odesia-qwen32B.out.txt | ||
#SBATCH --error=.slurm/Odesia-qwen32B.err.txt | ||
|
||
|
||
source /ikerlariak/igarcia945/envs/pytorch2/bin/activate | ||
|
||
|
||
export LC_ALL=en_US.UTF-8 | ||
export LANG=en_US.UTF-8 | ||
export LANGUAGE=en_US.UTF-8 | ||
export TOKENIZERS_PARALLELISM=true | ||
export TRANSFORMERS_NO_ADVISORY_WARNINGS=true | ||
export WANDB_ENTITY=igarciaf | ||
export WANDB_PROJECT=Odesia | ||
export OMP_NUM_THREADS=16 | ||
export WANDB__SERVICE_WAIT=300 | ||
|
||
echo CUDA_VISIBLE_DEVICES "${CUDA_VISIBLE_DEVICES}" | ||
|
||
|
||
export PYTHONPATH="$PYTHONPATH:$PWD" | ||
accelerate launch --config_file train_configs/deepspeed_8.json src/train.py train_configs/qwen32B.yaml | ||
torchrun --standalone --master_port 37227 --nproc_per_node=1 src/evaluate.py --tasks all --quantization --model_name models/Qwen2.5-32B-Instruct --output_dir results/finetune/Qwen2.5-32B-Instruct | ||
torchrun --standalone --master_port 37227 --nproc_per_node=1 src/inference.py --tasks all --quantization --model_name models/Qwen2.5-32B-Instruct --output_dir results/finetune/Qwen2.5-32B-Instruct | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=Odesia-llama_LoRA | ||
#SBATCH --cpus-per-task=16 | ||
#SBATCH --nodes=1 | ||
#SBATCH --gres=gpu:4 | ||
#SBATCH --mem=64G | ||
#SBATCH --output=.slurm/Odesia-llama_LoRA.out.txt | ||
#SBATCH --error=.slurm/Odesia-llama_LoRA.err.txt | ||
|
||
|
||
source /ikerlariak/igarcia945/envs/pytorch2/bin/activate | ||
|
||
|
||
export LC_ALL=en_US.UTF-8 | ||
export LANG=en_US.UTF-8 | ||
export LANGUAGE=en_US.UTF-8 | ||
export TOKENIZERS_PARALLELISM=true | ||
export TRANSFORMERS_NO_ADVISORY_WARNINGS=true | ||
export WANDB_ENTITY=igarciaf | ||
export WANDB_PROJECT=Odesia | ||
export OMP_NUM_THREADS=16 | ||
export WANDB__SERVICE_WAIT=300 | ||
|
||
echo CUDA_VISIBLE_DEVICES "${CUDA_VISIBLE_DEVICES}" | ||
|
||
|
||
export PYTHONPATH="$PYTHONPATH:$PWD" | ||
accelerate launch --config_file train_configs/deepspeed.json src/train.py train_configs/qwen_72B_LoRA.yaml | ||
torchrun --standalone --master_port 37227 --nproc_per_node=1 src/evaluate.py --tasks all --quantization --model_name models/Hermes-3-Llama-3.1-8B_LoRA --output_dir results/finetune/Hermes-3-Llama-3.1-8B_LoRA | ||
torchrun --standalone --master_port 37227 --nproc_per_node=1 src/inference.py --tasks all --quantization --model_name models/Hermes-3-Llama-3.1-8B_LoRA --output_dir results/finetune/Hermes-3-Llama-3.1-8B_LoRA | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"compute_environment": "LOCAL_MACHINE", | ||
"debug": false, | ||
"deepspeed_config": { | ||
"deepspeed_config_file": "train_configs/deepspeed_zero3.json", | ||
"zero3_init_flag": false | ||
}, | ||
"distributed_type": "DEEPSPEED", | ||
"downcast_bf16": "no", | ||
"enable_cpu_affinity": false, | ||
"machine_rank": 0, | ||
"main_training_function": "main", | ||
"num_machines": 1, | ||
"num_processes": 8, | ||
"rdzv_backend": "static", | ||
"same_network": true, | ||
"tpu_env": [], | ||
"tpu_use_cluster": false, | ||
"tpu_use_sudo": false, | ||
"use_cpu": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#Training args | ||
model_name_or_path: Qwen/Qwen2.5-14B-Instruct | ||
torch_dtype: bfloat16 | ||
use_lora: false | ||
quantization: null | ||
gradient_checkpointing: true | ||
force_auto_device_map: false | ||
use_flash_attention: true | ||
deepspeed: train_configs/deepspeed_zero3.json | ||
use_liger_kernel: true | ||
|
||
output_dir: models/Qwen2.5-14B-Instruct | ||
overwrite_output_dir: true | ||
load_best_model_at_end: false | ||
metric_for_best_model: eval_loss | ||
greater_is_better: false | ||
save_strategy: "no" | ||
save_only_model: true | ||
save_total_limit: 1 | ||
|
||
# evaluation | ||
do_train: true | ||
do_eval: true | ||
do_predict: false | ||
evaluation_strategy: "epoch" | ||
|
||
per_device_train_batch_size: 1 | ||
per_device_eval_batch_size: 1 | ||
gradient_accumulation_steps: 16 | ||
|
||
# optimizer settings | ||
optim: adamw_torch | ||
learning_rate: 0.000005 | ||
weight_decay: 0.0 | ||
num_train_epochs: 3 | ||
lr_scheduler_type: cosine | ||
warmup_ratio: 0.1 | ||
adam_beta1: 0.9 | ||
adam_beta2: 0.95 | ||
adam_epsilon: 1e-12 | ||
|
||
# lora settings | ||
lora_r: 128 | ||
lora_alpha: 256 | ||
lora_dropout: 0.05 | ||
lora_target_modules: | ||
- all | ||
|
||
# reporting | ||
logging_strategy: steps | ||
logging_first_step: true | ||
logging_steps: 5 | ||
report_to: wandb | ||
run_name: "Qwen2.5-14B-Instruct" | ||
disable_tqdm: false | ||
|
||
# hub settings | ||
push_to_hub: false | ||
resume_from_checkpoint: false | ||
|
||
# performance | ||
bf16: true | ||
fp16: false | ||
torch_compile: false | ||
ddp_find_unused_parameters: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#Training args | ||
model_name_or_path: Qwen/Qwen2.5-32B-Instruct | ||
torch_dtype: bfloat16 | ||
use_lora: false | ||
quantization: null | ||
gradient_checkpointing: true | ||
force_auto_device_map: false | ||
use_flash_attention: true | ||
deepspeed: train_configs/deepspeed_zero3.json | ||
use_liger_kernel: true | ||
|
||
output_dir: models/Qwen2.5-32B-Instruct | ||
overwrite_output_dir: true | ||
load_best_model_at_end: false | ||
metric_for_best_model: eval_loss | ||
greater_is_better: false | ||
save_strategy: "no" | ||
save_only_model: true | ||
save_total_limit: 1 | ||
|
||
# evaluation | ||
do_train: true | ||
do_eval: true | ||
do_predict: false | ||
evaluation_strategy: "epoch" | ||
|
||
per_device_train_batch_size: 2 | ||
per_device_eval_batch_size: 1 | ||
gradient_accumulation_steps: 8 | ||
|
||
# optimizer settings | ||
optim: adamw_torch | ||
learning_rate: 0.000005 | ||
weight_decay: 0.0 | ||
num_train_epochs: 3 | ||
lr_scheduler_type: cosine | ||
warmup_ratio: 0.1 | ||
adam_beta1: 0.9 | ||
adam_beta2: 0.95 | ||
adam_epsilon: 1e-12 | ||
|
||
# lora settings | ||
lora_r: 128 | ||
lora_alpha: 256 | ||
lora_dropout: 0.05 | ||
lora_target_modules: | ||
- all | ||
|
||
# reporting | ||
logging_strategy: steps | ||
logging_first_step: true | ||
logging_steps: 5 | ||
report_to: wandb | ||
run_name: "Qwen2.5-32B-Instruct" | ||
disable_tqdm: false | ||
|
||
# hub settings | ||
push_to_hub: false | ||
resume_from_checkpoint: false | ||
|
||
# performance | ||
bf16: true | ||
fp16: false | ||
torch_compile: false | ||
ddp_find_unused_parameters: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#Training args | ||
model_name_or_path: Qwen/Qwen2.5-72B | ||
torch_dtype: bfloat16 | ||
use_lora: true | ||
quantization: 4 | ||
gradient_checkpointing: true | ||
force_auto_device_map: false | ||
use_flash_attention: true | ||
deepspeed: train_configs/deepspeed_zero3.json | ||
|
||
output_dir: models/Qwen2.5-72B_LoRA | ||
overwrite_output_dir: true | ||
load_best_model_at_end: false | ||
metric_for_best_model: eval_loss | ||
greater_is_better: false | ||
save_strategy: "no" | ||
save_only_model: true | ||
save_total_limit: 1 | ||
|
||
# evaluation | ||
do_train: true | ||
do_eval: true | ||
do_predict: false | ||
evaluation_strategy: "epoch" | ||
|
||
per_device_train_batch_size: 2 | ||
per_device_eval_batch_size: 2 | ||
gradient_accumulation_steps: 8 | ||
|
||
# optimizer settings | ||
optim: adamw_torch | ||
learning_rate: 0.0003 | ||
weight_decay: 0.001 | ||
num_train_epochs: 3 | ||
lr_scheduler_type: cosine | ||
warmup_ratio: 0.1 | ||
adam_epsilon: 0.0000001 | ||
|
||
# lora settings | ||
lora_r: 128 | ||
lora_alpha: 256 | ||
lora_dropout: 0.05 | ||
lora_target_modules: | ||
- all | ||
|
||
# reporting | ||
logging_strategy: steps | ||
logging_first_step: true | ||
logging_steps: 5 | ||
report_to: wandb | ||
run_name: "Qwen2.5-72B_LoRA" | ||
disable_tqdm: false | ||
|
||
# hub settings | ||
push_to_hub: false | ||
resume_from_checkpoint: false | ||
|
||
# performance | ||
bf16: true | ||
fp16: false | ||
torch_compile: false | ||
ddp_find_unused_parameters: false |