forked from allenai/specter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path6-embed-script-finetuned-thesis.sh
More file actions
executable file
·54 lines (45 loc) · 2.03 KB
/
6-embed-script-finetuned-thesis.sh
File metadata and controls
executable file
·54 lines (45 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/bash
# Read sample data like this. .["field"] is required with jq
# cat data/sample-metadata.json | jq '.["008b5715a2e3a52674edc325853577de86588681"]'
#
# Read output data like this
# cat output.jsonl | jq 'select(.paper_id=="<paper_id>")'
set -x
export USE_HF_SPECIAL_TOKENS=True
echo "[*] Embedding samples with finetuned model ./thesis_data/inference_demo/paper-metadata-cls.json"
ts=$(date +%Y%m%d_%H%M%S)
output_filename="${ts}_myown_embeddings_cls.jsonl"
if [[ ! $* == *--demo* ]]
then
echo "[*] Embedding with FULL DATA"
OUTPUT_FILE="./thesis_data/inference_demo/results/${output_filename}"
python scripts/embed.py \
--ids "./thesis_data/inference/sample.ids" \
--metadata "./thesis_data/inference/paper-metadata-cls.json" \
--model "./thesis-model-output/model.tar.gz" \
--output-file $OUTPUT_FILE \
--vocab-dir "./thesis_data/vocab/finnish_bert_cased/vocabulary/" \
--batch-size 16 \
--cuda-device -1 # 0 = use GPU, -1 = use CPU
echo "See results in $OUTPUT_FILE"
else
echo "[*] Embedding DEMO"
OUTPUT_FILE="./thesis_data/inference_demo/results/${output_filename}"
python scripts/embed.py \
--ids "./thesis_data/inference_demo/sample.ids" \
--metadata "./thesis_data/inference_demo/paper-metadata-cls.json" \
--model "./thesis-model-output-demo/model.tar.gz" \
--output-file $OUTPUT_FILE \
--vocab-dir ".thesis_specter/thesis-model-output-demo/vocabulary/" \
--batch-size 16 \
--cuda-device -1 # 0 = use GPU, -1 = use CPU
echo "See results in $OUTPUT_FILE"
fi
# The script is converted to shell script
# python specter/predict_command.py predict
# ./model.tar.gz
# data/sample.ids
# --include-package specter
# --predictor specter_predictor
# --overrides "{'model':{'predict_mode':'true','include_venue':'false'},'dataset_reader':{'type':'specter_data_reader','predict_mode':'true','paper_features_path':'data/sample-metadata.json','included_text_fields': 'abstract title'},'vocabulary':{'directory_path':'data/vocab/'}}" --cuda-device -1 --output-file output.jsonl --batch-size 16 --silent
echo "Done.."