-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathrun_lm_eval_harness_tput.sh
More file actions
executable file
·84 lines (74 loc) · 2.78 KB
/
run_lm_eval_harness_tput.sh
File metadata and controls
executable file
·84 lines (74 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
# Setup environment
cd ../ || exit # Go to the root directory of the repo
source setup_env.sh
QWEN_MODEL="Qwen/Qwen3-1.7B-Base"
# TODO: Uncomment a model and run
########### AR
#MODEL_PATH="outputs/<PATH_TO_AR_SAVED_MODEL_DIR>"
#BLOCK_SIZE=1
#KV_CACHING=true
#ALIGN_INPUTS_TO_BLOCKS=true
#USE_EMA=true
############ MDLM
#MODEL_PATH="outputs/<PATH_TO_MDLM_SAVED_MODEL_DIR>"
#BLOCK_SIZE=64
#KV_CACHING=false
#ALIGN_INPUTS_TO_BLOCKS=false
#USE_EMA=true
############ BD3LM
#MODEL_PATH="outputs/<PATH_TO_BD3LM_SAVED_MODEL_DIR>"
#BLOCK_SIZE=4
#KV_CACHING=true
#ALIGN_INPUTS_TO_BLOCKS=true
#USE_EMA=true
######## E2D2
MODEL_PATH="kuleshov-group/e2d2-gsm8k-finetune-Qwen3-2B"
# MODEL_PATH="outputs/<PATH_TO_E2D2_SAVED_MODEL_DIR>"
BLOCK_SIZE=4
KV_CACHING=true
ALIGN_INPUTS_TO_BLOCKS=true
USE_EMA=true
OUTPUT_DIR="outputs/${MODEL_PATH}/lm_eval_harness_output"
REVISION=null
mkdir -p ${OUTPUT_DIR}
L=256
T=${BLOCK_SIZE}
DO_SAMPLE=false
SAMPLING_STRATEGY="predict_and_noise" # "predict_and_noise" or "posterior"
FIRST_HITTING=true
CONFIDENCE_BASED_NOISING=true
CONFIDENCE_MARGIN_BASED_NOISING=false
CKPT="best"
USE_EMA=true
OUTPUT_PATH="${OUTPUT_DIR}/ema${USE_EMA}_ckpt${CKPT}_${NUM_FEW_SHOT}shot_L${L}_block_size${BLOCK_SIZE}-do_sample${DO_SAMPLE}-sampling_strategy${SAMPLING_STRATEGY}-T${T}_first_hitting${FIRST_HITTING}-confidence_based_noising${CONFIDENCE_BASED_NOISING}-confidence_margin_based_noising${CONFIDENCE_MARGIN_BASED_NOISING}-align_inputs_to_blocks${ALIGN_INPUTS_TO_BLOCKS}"
mkdir -p ${OUTPUT_PATH}
accelerate launch scripts/eval/harness_eval.py \
hydra.output_subdir=null \
hydra.run.dir="${PWD}" \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled \
+eval/lm_eval_harness@task=gsm8k \
pretrained_model_name_or_path=${MODEL_PATH} \
pretrained_model_revision=${REVISION} \
task.model.ckpt_file="${CKPT}-rank0.pt" \
task.model.load_ema_weights=${USE_EMA} \
+task.model.throughput_run=true \
tokenizer.pretrained_model_name_or_path=${QWEN_MODEL} \
output_path=${OUTPUT_PATH} \
generated_samples_output_path=${OUTPUT_PATH} \
max_new_tokens=${L} \
block_size=${BLOCK_SIZE} \
generation_config.do_sample=${DO_SAMPLE} \
generation_config.sampling_strategy=${SAMPLING_STRATEGY} \
generation_config.num_steps=${T} \
generation_config.first_hitting=${FIRST_HITTING} \
generation_config.confidence_based_noising=${CONFIDENCE_BASED_NOISING} \
generation_config.confidence_margin_based_noising=${CONFIDENCE_MARGIN_BASED_NOISING} \
generation_config.confidence_threshold=1.1 \
generation_config.use_cache=${KV_CACHING} \
generation_config.align_inputs_to_blocks=${ALIGN_INPUTS_TO_BLOCKS} \
~generation/logits_processor@logits_processor_list \
gen_kwargs.logits_processor=null \
~generation/stopping_criteria@stopping_criteria_list \
gen_kwargs.stopping_criteria=null