forked from allenai/specter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path5-embed-script-baseline-finnish-bert-cased-v1.sh
More file actions
executable file
·56 lines (46 loc) · 2.05 KB
/
5-embed-script-baseline-finnish-bert-cased-v1.sh
File metadata and controls
executable file
·56 lines (46 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/bin/bash
# Read sample data like this. .["field"] is required with jq
# cat data/sample-metadata.json | jq '.["008b5715a2e3a52674edc325853577de86588681"]'
#
# Read output data like this
# cat output.jsonl | jq 'select(.paper_id=="<paper_id>")'
set -x
export USE_HF_SPECIAL_TOKENS=True
ts=$(date +%Y%m%d_%H%M%S)
output_filename="${ts}_baseline_finnishbert_cls_from_allennlp.jsonl"
if [[ ! $* == *--demo* ]]
then
echo "[*] FULL Embedding samples with baseline model..."
OUTPUT_FILE="./thesis_data/inference/results/$output_filename"
python scripts/embed.py \
--ids thesis_data/inference/sample.ids \
--metadata thesis_data/inference/sample-metadata.json \
--model ./thesis_data/inference/models/finnish_bert_base_cased_v1/finnish_bert.tar.gz \
--output-file $OUTPUT_FILE \
--vocab-dir thesis_data/finnish_bert_cased/vocabulary/ \
--batch-size 16 \
--cuda-device -1
# 0 = use GPU, -1 = use CPU
else
echo "[*] DEMO Embedding samples with baseline model..."
OUTPUT_FILE="./thesis_data/inference_demo/results/$output_filename"
python scripts/embed.py \
--ids thesis_data/inference_demo/sample.ids \
--metadata thesis_data/inference_demo/paper-metadata-cls.json \
--model ./thesis_data/inference/models/finnish_bert_base_cased_v1/finnish_bert.tar.gz \
--output-file $OUTPUT_FILE \
--vocab-dir thesis_data/finnish_bert_cased/vocabulary/ \
--batch-size 16 \
--cuda-device -1
# 0 = use GPU, -1 = use CPU
fi
echo "[*] Done embedding samples with baseline model..."
echo "[*] Output file: $OUTPUT_FILE"
# The script is converted to shell script
# python specter/predict_command.py predict
# ./model.tar.gz
# data/sample.ids
# --include-package specter
# --predictor specter_predictor
# --overrides "{'model':{'predict_mode':'true','include_venue':'false'},'dataset_reader':{'type':'specter_data_reader','predict_mode':'true','paper_features_path':'data/sample-metadata.json','included_text_fields': 'abstract title'},'vocabulary':{'directory_path':'data/vocab/'}}" --cuda-device -1 --output-file output.jsonl --batch-size 16 --silent
echo "Done.."