forked from OpenRLHF/OpenRLHF
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain_prm_mistral.sh
More file actions
executable file
·34 lines (31 loc) · 851 Bytes
/
train_prm_mistral.sh
File metadata and controls
executable file
·34 lines (31 loc) · 851 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
set -x
read -r -d '' training_commands <<EOF
openrlhf.cli.train_prm \
--save_path ./checkpoint/mistal-7b-prm \
--save_steps 500 \
--logging_steps 1 \
--eval_steps 100 \
--train_batch_size 256 \
--micro_train_batch_size 8 \
--pretrain mistralai/Mistral-7B-v0.1 \
--param_dtype bf16 \
--max_epochs 1 \
--max_len 8192 \
--zero_stage 3 \
--learning_rate 1e-6 \
--dataset zhuzilin/Math-Shepherd \
--input_key input \
--label_key value \
--attn_implementation flash_attention_2 \
--load_checkpoint \
--gradient_checkpointing \
--packing_samples \
--wandb_group prm \
--placeholder_token ки \
--reward_tokens + -
EOF
# --use_wandb [WANDB_TOKENS] or True (use wandb login command)
# --packing_samples
if [[ ${1} != "slurm" ]]; then
deepspeed --module $training_commands
fi