@@ -154,7 +154,8 @@ deployment_groups:
154154 from transformers import AutoTokenizer
155155 import numpy as np
156156 tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
157- tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
157+ sentences = [str(s) for s in dataset["sentence"]]
158+ tokenized_data = tokenizer(sentences, return_tensors="np", padding=True)
158159 tokenized_data = dict(tokenized_data)
159160 labels = np.array(dataset["label"])
160161 from transformers import TFAutoModelForSequenceClassification
@@ -195,7 +196,8 @@ deployment_groups:
195196 from transformers import AutoTokenizer
196197 import numpy as np
197198 tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
198- tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
199+ sentences = [str(s) for s in dataset["sentence"]]
200+ tokenized_data = tokenizer(sentences, return_tensors="np", padding=True)
199201 tokenized_data = dict(tokenized_data)
200202 labels = np.array(dataset["label"])
201203 from transformers import TFAutoModelForSequenceClassification
@@ -236,7 +238,8 @@ deployment_groups:
236238 from transformers import AutoTokenizer
237239 import numpy as np
238240 tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
239- tokenized_data = tokenizer(dataset["sentence"], return_tensors="np", padding=True)
241+ sentences = [str(s) for s in dataset["sentence"]]
242+ tokenized_data = tokenizer(sentences, return_tensors="np", padding=True)
240243 tokenized_data = dict(tokenized_data)
241244 labels = np.array(dataset["label"])
242245 from transformers import TFAutoModelForSequenceClassification
0 commit comments