-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_ppo.sh
102 lines (96 loc) · 2.53 KB
/
run_ppo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# QReCC + CoQA
CUDA_VISIBLE_DEVICES=0 python run_ppo.py \
--do_train \
--epochs 6 \
--ppo_epochs 1 \
--dataset_name src/data_utils/coqa.py \
--dataset_config_name coqa_ppo \
--length 50 \
--gen_max_seq_length 150 \
--batch_size 16 \
--per_device_eval_batch_size 16 \
--qa_tokenizer_name save/roberta-base-coqa \
--qa_model_name_or_path save/roberta-base-coqa \
--ppo_tokenizer_name save/gpt2-qrecc \
--ppo_model_name_or_path save/gpt2-qrecc \
--lr 1e-7 \
--output_dir save \
--exp gpt2-qrecc-coqa \
--repetition_penalty 1.1 \
--num_beams 5 \
--vf_coef 0.5 \
--xent_init_length 3 \
--mixer_steps 12000 \
--xent_min_length 1
# CANARD + CoQA
CUDA_VISIBLE_DEVICES=0 python run_ppo.py \
--do_train \
--epochs 6 \
--ppo_epochs 1 \
--dataset_name src/data_utils/coqa.py \
--dataset_config_name coqa_ppo \
--length 50 \
--gen_max_seq_length 150 \
--batch_size 16 \
--per_device_eval_batch_size 16 \
--qa_tokenizer_name save/roberta-base-coqa \
--qa_model_name_or_path save/roberta-base-coqa \
--ppo_tokenizer_name save/gpt2-canard \
--ppo_model_name_or_path save/gpt2-canard \
--lr 1e-7 \
--output_dir save \
--exp gpt2-canard-coqa \
--repetition_penalty 1.1 \
--num_beams 5 \
--vf_coef 0.5 \
--xent_init_length 3 \
--mixer_steps 12000 \
--xent_min_length 1
# QReCC + QuAC
CUDA_VISIBLE_DEVICES=0 python run_ppo.py \
--do_train \
--epochs 6 \
--ppo_epochs 1 \
--dataset_name src/data_utils/quac.py \
--dataset_config_name quac_ppo \
--length 50 \
--gen_max_seq_length 150 \
--batch_size 8 \
--per_device_eval_batch_size 8 \
--qa_tokenizer_name save/roberta-base-quac \
--qa_model_name_or_path save/roberta-base-quac \
--ppo_tokenizer_name save/gpt2-qrecc \
--ppo_model_name_or_path save/gpt2-qrecc \
--lr 1e-7 \
--output_dir save \
--exp gpt2-qrecc-quac \
--repetition_penalty 1.1 \
--num_beams 5 \
--vf_coef 0.5 \
--xent_init_length 3 \
--mixer_steps 12000 \
--xent_min_length 1
# CANARD + QuAC
CUDA_VISIBLE_DEVICES=0 python run_ppo.py \
--do_train \
--epochs 6 \
--ppo_epochs 1 \
--dataset_name src/data_utils/quac.py \
--dataset_config_name quac_ppo \
--length 50 \
--gen_max_seq_length 150 \
--batch_size 8 \
--per_device_eval_batch_size 8 \
--qa_tokenizer_name save/roberta-base-quac \
--qa_model_name_or_path save/roberta-base-quac \
--ppo_tokenizer_name save/gpt2-canard \
--ppo_model_name_or_path save/gpt2-canard \
--lr 1e-7 \
--output_dir save \
--exp gpt2-canard-quac \
--repetition_penalty 1.1 \
--num_beams 5 \
--vf_coef 0.5 \
--xent_init_length 3 \
--mixer_steps 12000 \
--xent_min_length 1