From f0cc8044f34073fd10f284f15273eb2f5fba4a5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Clizhenyu=E2=80=9D?= Date: Fri, 30 Jul 2021 16:26:07 +0800 Subject: [PATCH] fix load_entity_dict_zeshel function --- blink/biencoder/zeshel_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blink/biencoder/zeshel_utils.py b/blink/biencoder/zeshel_utils.py index d3dc3594..17cc48f5 100644 --- a/blink/biencoder/zeshel_utils.py +++ b/blink/biencoder/zeshel_utils.py @@ -56,7 +56,7 @@ def load_entity_dict_zeshel(logger, params): line = line.rstrip() item = json.loads(line) text = item["text"] - doc_list.append(text[:256]) + doc_list.append(" ".join(text.split()[:128])) if params["debug"]: if len(doc_list) > 200: