-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathtrain-gpt-2.sh
More file actions
42 lines (40 loc) · 1.13 KB
/
train-gpt-2.sh
File metadata and controls
42 lines (40 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
LOCAL_BATCH_SIZE=64
GRAD_ACC=8
NPROC=1
TAG=your-tag
PORT=13222
nohup python dp_main.py > /inspire/hdd/project/yunweiyuhuifu/p-shangli/cmy/Metis-Hif4/nohupout/$TAG.log \
--chkpt-dir /your/checkpoint/path \
--dataset-path ./dataset \
--log-dir /your/log/path \
--tokenizer-path /your/tokenizer/path \
--device 5 \
--tag $TAG \
--reg-lambda 0 \
--layers 12 \
--embed-dim 768 \
--max-epochs 4 \
--heads 12 \
--lr-warmup-steps 50 \
--grad-clipping 2.0 \
--win-size 256 \
--forward-svd-warmup-steps 0 \
--forward-svd-merge-steps -1 \
--batch-size $LOCAL_BATCH_SIZE \
--lr 1e-4 \
--merged-lr 1e-4 \
--grad-acc $GRAD_ACC \
--train-steps 400000 \
--q-forward-input nvfp4e2m1bnosr \
--q-forward-weight nvfp4e2m1bnosr \
--q-backward-input nvfp4e2m1bnosr \
--q-backward-weight nvfp4e2m1bnosr \
--q-backward-outputgrad nvfp4e2m1b \
--enable-lowbit \
--save-steps 10000 \
--enable-forward-svd \
--forward-lowrank-svd 16 \
--enable-backward-svd \
--backward-lowrank-svd 16 \
--backward-lowrank-niter 2 \
--backward-broadcast-dim -1 \