forked from allenai/specter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path4-run-training-script-thesis.sh
More file actions
executable file
·68 lines (54 loc) · 2.37 KB
/
4-run-training-script-thesis.sh
File metadata and controls
executable file
·68 lines (54 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
set -x
echo "[*] Starting training script..."
echo "[*] Creating training files (thesis version).. "
export USE_HF_SPECIAL_TOKENS=True
if [[ ! $* == *--demo* ]]
then
echo "[*] Using all the samples for training.. ====[THIS IS THE FULL TRAINING]===="
OUTPUT_PATH="thesis-model-output"
rm -r $OUTPUT_PATH
echo "[*] Removed data with rm -r ${OUTPUT_PATH}"
echo "[*] Make sure you have a correct number or training instances"
echo "[*] Reading number of training instances from thesis_data/preprocessed/data-metrics.json"
training_instances_count=$(cat thesis_data/preprocessed/data-metrics.json | jq .train)
echo "[*] Using $training_instances_count as training instances"
set -u
./scripts/run-exp-simple_thesis.sh \
-c experiment_configs/original.jsonnet \
-s $OUTPUT_PATH/ \
--num-epochs 2 \
--batch-size 4 \
--train-path thesis_data/preprocessed/data-train.p \
--dev-path thesis_data/preprocessed/data-val.p \
--bert-vocab thesis_data/finnish_bert_cased/vocab.txt \
--bert-weights TurkuNLP/bert-base-finnish-cased-v1 \
--vocab thesis_data/finnish_bert_cased/vocabulary \
--num-train-instances $training_instances_count \
--cuda-device -1
else
echo "[*] Using DEMO samples for training.. ====[NOT THE REAL FULL DATA]===="
OUTPUT_PATH="thesis-model-output-demo"
rm -r $OUTPUT_PATH
echo "[*] Removed data with rm -r ${OUTPUT_PATH}"
echo "[*] Make sure you have a correct number or training instances"
echo "[*] Reading number of training instances from thesis_data/preprocessed_demo/data-metrics.json"
training_instances_count=$(cat thesis_data/preprocessed_demo/data-metrics.json | jq .train)
echo "[*] Using $training_instances_count as training instances"
set -u
./scripts/run-exp-simple_thesis.sh \
-c experiment_configs/original.jsonnet \
-s $OUTPUT_PATH/ \
--num-epochs 2 \
--batch-size 2 \
--train-path thesis_data/preprocessed_demo/data-train.p \
--dev-path thesis_data/preprocessed_demo/data-val.p \
--bert-vocab thesis_data/finnish_bert_cased/vocab.txt \
--bert-weights thesis_data/finnish_bert_cased/finnish_bert.tar.gz \
--vocab thesis_data/finnish_bert_cased/vocabulary \
--num-train-instances $training_instances_count \
--cuda-device -1
fi
#--bert-weights thesis_data/finnish_bert_cased/finnish_bert.tar.gz \
echo "[*] DONE. Training finished.."
echo "[*] Next Step: See finetuned model in $OUTPUT_PATH"