-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun_seq2seq_eval_cnndm_tput.sh
More file actions
executable file
·77 lines (69 loc) · 2.54 KB
/
run_seq2seq_eval_cnndm_tput.sh
File metadata and controls
executable file
·77 lines (69 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
# Setup environment
cd ../ || exit # Go to the root directory of the repo
source setup_env.sh
# TODO: Uncomment a model and run
######## AR
#KV_CACHING=true
#ALIGN_INPUTS_TO_BLOCKS=true
#BLOCK_SIZE=1
#MODEL_PATH="${RUN_DIR}/<PATH_TO_AR_SAVED_MODEL_DIR>"
########### MDLM
#KV_CACHING=false
#ALIGN_INPUTS_TO_BLOCKS=false
#BLOCK_SIZE=32
#MODEL_PATH="${RUN_DIR}/<PATH_TO_MDLM_SAVED_MODEL_DIR>"
########### BD3LM
#KV_CACHING=true
#ALIGN_INPUTS_TO_BLOCKS=true
#BLOCK_SIZE=8
#MODEL_PATH="${RUN_DIR}/<PATH_TO_BD3LM_SAVED_MODEL_DIR>"#LEN_PENALTY=1.1
########### E2D2
BLOCK_SIZE=8
MODEL_PATH="kuleshov-group/e2d2-cnndm"
#MODEL_PATH="${RUN_DIR}/<PATH_TO_E2D2_SAVED_MODEL_DIR>"
KV_CACHING=true
ALIGN_INPUTS_TO_BLOCKS=false
OUTPUT_DIR="outputs/${MODEL_PATH}/cnn_dailymail"
REVISION=null
mkdir -p ${OUTPUT_DIR}
L=256
T=${BLOCK_SIZE}
DO_SAMPLE=false
SAMPLING_STRATEGY="predict_and_noise" # "predict_and_noise" "posterior"
FIRST_HITTING=true
CONFIDENCE_BASED_NOISING=true
MAX_LENGTH=4096
CKPT="best"
USE_EMA=true
OUTPUT_PATH="${OUTPUT_DIR}/L-${L}-block_size-${BLOCK_SIZE}-do_sample-${DO_SAMPLE}-sampling_strategy-${SAMPLING_STRATEGY}-first_hitting-${FIRST_HITTING}-confidence_based_noising-${CONFIDENCE_BASED_NOISING}-align_inputs_to_blocks${ALIGN_INPUTS_TO_BLOCKS}-ckpt${CKPT}-ema${USE_EMA}"
PORT=29504
torchrun --nproc_per_node ${NUM_VISIBLE_DEVICES} --master_port=${PORT} scripts/eval/seq2seq_eval.py \
hydra.output_subdir=null \
hydra.run.dir="${PWD}" \
hydra/job_logging=disabled \
hydra/hydra_logging=disabled \
+eval/seq2seq@task=cnn_dailymail \
pretrained_model_name_or_path=${MODEL_PATH} \
pretrained_model_revision=${REVISION} \
+model_config_overrides.length=${MAX_LENGTH} \
+ckpt_file="${CKPT}-rank0.pt" \
+load_ema_weights=${USE_EMA} \
tokenizer.pretrained_model_name_or_path="Qwen/Qwen3-0.6B-Base" \
output_path=${OUTPUT_PATH} \
generated_samples_output_path=${OUTPUT_PATH} \
max_length=${MAX_LENGTH} \
max_new_tokens=${L} \
block_size=${BLOCK_SIZE} \
generation_config.num_steps=${T} \
generation_config.do_sample=${DO_SAMPLE} \
generation_config.sampling_strategy=${SAMPLING_STRATEGY} \
generation_config.first_hitting=${FIRST_HITTING} \
generation_config.confidence_based_noising=${CONFIDENCE_BASED_NOISING} \
generation_config.use_cache=${KV_CACHING} \
generation_config.align_inputs_to_blocks=${ALIGN_INPUTS_TO_BLOCKS} \
~generation/stopping_criteria@stopping_criteria_list \
gen_kwargs.stopping_criteria=null \
~generation/logits_processor@logits_processor_list \
gen_kwargs.logits_processor=null \
+throughput_run=true