-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpredict.py
58 lines (44 loc) · 1.88 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import argparse
import spacy
import numpy as np
from keras.models import load_model
from dataset.data_processor import numericalize
from utils.serialization import load_object
from constants import NO_ENTITY_TOKEN, MAX_LEN_CHAR
def parse_args():
parser = argparse.ArgumentParser(description='Script for using NER model')
parser.add_argument('-p', '--path', help='Path to model and vocabulary directory.')
args = parser.parse_args()
# add path separator (/) at the end if needed
args.path = args.path if args.path[-1] == os.path.sep else args.path + os.path.sep
return args
def main():
args = parse_args()
vocabs = load_object(args.path + 'vocabs')
model = load_model(args.path + 'model_ner')
nlp = spacy.load('en')
while True:
user_input = input('Input sentence: ').strip()
if not user_input:
continue
if user_input == 'end':
break
# tokenize user input
doc = nlp(user_input)
text = [token.text for token in doc]
pos = [token.tag_ for token in doc]
chars = numericalize(vocabs.chars, [[c for c in token.text] for token in doc], NO_ENTITY_TOKEN, maxlen=MAX_LEN_CHAR)
chars = np.array(chars)[np.newaxis, :, :]
# get model output
# pad token is irrelevant here beacuse we are numericalizing just one sentence (it won't be padded)
text = np.array(numericalize(vocabs.words, [text], NO_ENTITY_TOKEN))
pos = np.array(numericalize(vocabs.pos, [pos], NO_ENTITY_TOKEN))
out = model.predict([text, pos, chars]).squeeze()
predicted_labels = [vocabs.labels.itos[label_idx] for label_idx in np.argmax(out, axis=1).tolist()]
# print result
for token, label in zip([token.text for token in doc], predicted_labels):
print("%s %s" % (token, label))
print()
if __name__ == '__main__':
main()