We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b1b42f9 commit 3917907Copy full SHA for 3917907
examples/scripts/preprocess_data.py
@@ -27,8 +27,12 @@
27
from primus.backends.megatron.training.tokenizer.tokenizer import (
28
_add_extra_tokenizer_args as _add_tokenizer_args,
29
)
30
+
31
+# isort: off
32
from primus.backends.megatron.training.tokenizer.tokenizer import build_tokenizer
33
34
+# isort: on
35
36
37
# https://stackoverflow.com/questions/33139531/preserve-empty-lines-with-nltks-punkt-tokenizer
38
class CustomLanguageVars(PunktLanguageVars):
0 commit comments