We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 68f3066 commit 1bfbdb1Copy full SHA for 1bfbdb1
1 file changed
gguf-py/gguf/vocab.py
@@ -256,6 +256,11 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
256
if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
257
if not tokenizer_config:
258
special_bos = special_first
259
+ elif special_first not in (special_bos, special_cls):
260
+ if not special_bos:
261
+ tokenizer_config['bos_token'] = special_bos = special_first
262
+ if not special_cls:
263
+ tokenizer_config['cls_token'] = special_cls = special_first
264
self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
265
if special_first not in (special_bos, special_cls):
266
logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')
0 commit comments