-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_TF_serving_docker.py
35 lines (23 loc) · 1.1 KB
/
test_TF_serving_docker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import AutoTokenizer, TFAutoModel
import requests
import json
import numpy as np
sentence = "Hallo Dunia"
# Load the corresponding tokenizer of our SavedModel
tokenizer = AutoTokenizer.from_pretrained("carlesoctav/multi-qa-en-id-mMiniLMv2-L6-H384")
# Load the model config of our SavedModel
# Tokenize the sentence
batch = tokenizer(sentence)
# Convert the batch into a proper dict
batch = dict(batch)
# Put the example into a list of size 1, that corresponds to the batch size
batch = [batch]
print(batch)
# The REST API needs a JSON that contains the key instances to declare the examples to process
input_data = {"instances": batch}
# Query the REST API, the path corresponds to http://host:port/model_version/models_root_folder/model_name:method
r = requests.post("http://localhost:8501/v1/models/bert:predict", data=json.dumps(input_data))
# Parse the JSON result. The results are contained in a list with a root key called "predictions"
# and as there is only one example, takes the first element of the list
result = json.loads(r.text)["predictions"][0]["last_hidden_state"][0]
print(result)