Skip to content

Commit 1bfbdb1

Browse files
authored
vocab : adopt leading TemplateProcessing special token as BOS (#24428)
1 parent 68f3066 commit 1bfbdb1

1 file changed

Lines changed: 5 additions & 0 deletions

File tree

gguf-py/gguf/vocab.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,11 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
256256
if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
257257
if not tokenizer_config:
258258
special_bos = special_first
259+
elif special_first not in (special_bos, special_cls):
260+
if not special_bos:
261+
tokenizer_config['bos_token'] = special_bos = special_first
262+
if not special_cls:
263+
tokenizer_config['cls_token'] = special_cls = special_first
259264
self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
260265
if special_first not in (special_bos, special_cls):
261266
logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')

0 commit comments

Comments
 (0)