-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathrun.sh
More file actions
89 lines (80 loc) · 4.46 KB
/
run.sh
File metadata and controls
89 lines (80 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
date=`date +%s`
# date=1674664367
mkdir -p exp/$date/cache/s1
mkdir -p exp/$date/cache/s2
mkdir -p exp/$date/cache/retrieval
mkdir -p exp/$date/prediction/s1
mkdir -p exp/$date/prediction/s2
mkdir -p exp/$date/prediction/valid
mkdir -p exp/$date/output/s1
mkdir -p exp/$date/output/s2
mkdir -p exp/$date/output/retrieval
mkdir -p summary/s1
mkdir -p summary/s1
batch_size=12
max_source_length=512
max_target_length=100
output_dir=./exp/$date/output
res_dir=./exp/$date/prediction
cache_path=./exp/$date/cache
task=$1
learning_rate=5e-5
CUDA_VISIBLE_DEVICES=0 \
python run_gen.py \
--do_train --do_eval \
--task summarize --sub_task $task --model_type codet5 --data_num -1 \
--data_type s1 --num_train_epochs 5 --warmup_steps 1000 --learning_rate $learning_rate \
--tokenizer_name=Salesforce/codet5-base --model_name_or_path=Salesforce/codet5-base --data_dir ./data \
--cache_path $cache_path/s1 --output_dir $output_dir/s1 --summary_dir ./summary/s1 \
--save_last_checkpoints --always_save_model --res_dir $res_dir/s1 \
--train_batch_size $batch_size --eval_batch_size $batch_size --max_source_length $max_source_length --max_target_length $max_target_length
cp ./config.json ./exp/$date/output/s1/checkpoint-best-ppl/
CUDA_VISIBLE_DEVICES=0 \
python run_gen.py \
--do_train --do_eval --do_eval_bleu --do_test \
--task summarize --sub_task $task --model_type codet5 --data_num -1 \
--data_type s2 --num_train_epochs 10 --warmup_steps 1000 --learning_rate $learning_rate \
--tokenizer_name=Salesforce/codet5-base --model_name_or_path=./exp/$date/output/s1/checkpoint-best-ppl --data_dir ./data \
--cache_path $cache_path/s2 --output_dir $output_dir/s2 --summary_dir ./summary/s2 \
--save_last_checkpoints --always_save_model --res_dir $res_dir/s2 \
--train_batch_size $batch_size --eval_batch_size $batch_size --max_source_length $max_source_length --max_target_length $max_target_length
cp ./config.json ./exp/$date/output/s2/checkpoint-best-bleu/
rm -r exp/$date/cache/s2
mkdir exp/$date/cache/s2
CUDA_VISIBLE_DEVICES=0 \
python run_gen.py \
--do_test --test_file data/summarize/$task/valid.jsonl \
--task summarize --sub_task $task --model_type codet5 --data_num -1 \
--data_type s2 --num_train_epochs 10 --warmup_steps 1000 --learning_rate $learning_rate \
--tokenizer_name=Salesforce/codet5-base --model_name_or_path=./exp/$date/output/s2/checkpoint-best-bleu --data_dir ./data \
--cache_path $cache_path/s2 --output_dir $output_dir/s2 --summary_dir ./summary/s2 \
--save_last_checkpoints --always_save_model --res_dir $res_dir/valid \
--train_batch_size $batch_size --eval_batch_size $batch_size --max_source_length $max_source_length --max_target_length $max_target_length
CUDA_VISIBLE_DEVICES=0 \
python -W ignore run_gen.py \
--do_retrieval --retrieval_file valid \
--task summarize --sub_task $task --model_type codet5 --data_num -1 \
--tokenizer_name=Salesforce/codet5-base --model_name_or_path=./exp/$date/output/s2/checkpoint-best-bleu/ --data_dir ./data \
--data_type s2 --output_dir $output_dir/retrieval \
--cache_path $cache_path/retrieval --summary_dir ./summary/s1 --res_dir $res_dir/s1 \
--train_batch_size 32 --eval_batch_size 32 --max_source_length $max_source_length
CUDA_VISIBLE_DEVICES=0 \
python -W ignore run_gen.py \
--do_retrieval --retrieval_file test \
--task summarize --sub_task $task --model_type codet5 --data_num -1 \
--tokenizer_name=Salesforce/codet5-base --model_name_or_path=./exp/$date/output/s2/checkpoint-best-bleu/ --data_dir ./data \
--data_type s2 --output_dir $output_dir/retrieval \
--cache_path $cache_path/retrieval --summary_dir ./summary/s1 --res_dir $res_dir/s1 \
--train_batch_size 32 --eval_batch_size 32 --max_source_length $max_source_length
python -W ignore svm.py \
-valid_retrieval_msg ./exp/$date/output/retrieval/valid.output \
-valid_retrieval_bleu ./exp/$date/output/retrieval/valid.score \
-valid_generate_msg ./exp/$date/prediction/valid/test_best-bleu.output \
-valid_generate_score ./exp/$date/prediction/valid/test_best-bleu.score \
-ground_truth ./exp/$date/prediction/valid/test_best-bleu.gold \
-test_retrieval_msg ./exp/$date/output/retrieval/test.output \
-test_retrieval_bleu ./exp/$date/output/retrieval/test.score \
-test_generate_msg ./exp/$date/prediction/s2/test_best-bleu.output \
-test_generate_score ./exp/$date/prediction/s2/test_best-bleu.score \
-test_ground_truth ./exp/$date/prediction/s2/test_best-bleu.gold \
-output ./exp/$date/prediction/svm