forked from NVIDIA/Megatron-LM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoffline_feature_extract.sh
More file actions
executable file
·38 lines (29 loc) · 976 Bytes
/
offline_feature_extract.sh
File metadata and controls
executable file
·38 lines (29 loc) · 976 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"
# Set up cache dir for HF to avoid out of space error
export HF_DATASETS_CACHE="/tmp/hf_datasets_cache"
# Extra arguments of this script
MLM_DEFAULT_ARGS=" \
--distributed-timeout-minutes 30 \
--auto-detect-ckpt-format \
--export-te-mcore-model \
--finetune \
"
if [ -z ${MLM_DATA_ARGS} ]; then
MLM_DATA_ARGS=" \
--num-samples 128000 \
--finetune-hf-dataset nvidia/Daring-Anteater \
"
fi
${LAUNCH_SCRIPT} ${SCRIPT_DIR}/offline_feature_extract.py \
${MODEL_ARGS} \
--tensor-model-parallel-size ${TP} \
--expert-tensor-parallel-size ${ETP} \
--expert-model-parallel-size ${EP} \
--pipeline-model-parallel-size ${PP} \
--tokenizer-model ${TOKENIZER_MODEL} \
--load ${MLM_MODEL_CKPT} \
${MLM_DATA_ARGS} \
${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}