-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathrun_conll_srl.sh
58 lines (47 loc) · 2.28 KB
/
run_conll_srl.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
#SBATCH --cpus-per-task=3
#SBATCH --gres=gpu:rtx8000:1 # Ask for 1 GPU
#SBATCH --mem=32G # Ask for 32 GB of CPU RAM
#SBATCH --time=12:00:00 # The job will run for 12 hours
#SBATCH --output=%x-%j.out
#SBATCH --error=%x-%j.err
set -ex
# Activate your virtual environment locally
source $HOME'/.virtualenvs/pyt/bin/activate'
export DATASET_NAME='conll2005_srl_udv2'
export SEED=40
export SYNTAX_MODEL_TYPE="joint_fusion"
export model='bert-base-cased'
export BERT_WEIGHTS='hf-bert-weights'
# Save the model runs here
export SAVEDIR='checkpoints/1/'
mkdir -p ${SAVEDIR}
# :<<COMMENT
cp 'datasets/'${DATASET_NAME}'.tar.gz' ${SAVEDIR}
cp -r ${BERT_WEIGHTS}/"${model}" ${SAVEDIR}
# Untar your dataset
tar -xvzf ${SAVEDIR}'/'${DATASET_NAME}'.tar.gz' -C ${SAVEDIR}
# COMMENT
# Launch your job, tell it to save the model in $SLURM_TMPDIR and look for the dataset into $SLURM_TMPDIR
export DATA_DIR=${SAVEDIR}
export CACHE_MODEL_PATH=${SAVEDIR}/${model}
# Remove the cached data files
rm -rf ${SAVEDIR}/${DATASET_NAME}/checkpoint-best-model
rm -rf ${SAVEDIR}/${DATASET_NAME}/cached_*
# Train and Evaluate on WSJ Split
export TASK_NAME='conll2005wsj_srl'
python main.py --model_type "syntax_bert_tok" --model_name_or_path ${CACHE_MODEL_PATH} --task_name ${TASK_NAME} \
--data_dir ${DATA_DIR}/${DATASET_NAME} --max_seq_length 512 --per_gpu_eval_batch_size 32 \
--output_dir ${SAVEDIR}/${DATASET_NAME}/ --save_steps 2000 \
--overwrite_output_dir --num_train_epochs 20 --do_eval --do_train --evaluate_during_training \
--config_name_or_path "config/srl/bert-base/${SYNTAX_MODEL_TYPE}.json" --per_gpu_train_batch_size 16 \
--gradient_accumulation_steps 1 --wordpiece_aligned_dep_graph --seed ${SEED}
# Evaluate on Brown Split
export TASK_NAME='conll2005brown_srl'
python main.py --model_type "syntax_bert_tok" --model_name_or_path ${CACHE_MODEL_PATH} --task_name ${TASK_NAME} \
--data_dir ${DATA_DIR}/${DATASET_NAME} --max_seq_length 512 --per_gpu_eval_batch_size 32 \
--output_dir ${SLURM_TMPDIR}/${DATASET_NAME}/ --save_steps 1000 \
--overwrite_output_dir --num_train_epochs 20 --do_eval \
--config_name_or_path "config/srl/bert-base/${SYNTAX_MODEL_TYPE}.json" --per_gpu_train_batch_size 16 \
--gradient_accumulation_steps 1 --wordpiece_aligned_dep_graph
deactivate