moss-moon-003-sft Jupyter测试失败

在ModelScope上发布的moss-moon-003-sft-plugin模型，使用提供的Jupyter测试样例部署失败，在自己服务器和魔塔提供的实例上都不可以，错误是AttributeError: 'MossTokenizer' object has no attribute 'encoder'，GPU用的A40不知道有没有影响
`AttributeError                            Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/modelscope/utils/registry.py:212, in build_from_cfg(cfg, registry, group_key, default_args)
    211     else:
--> 212         return obj_cls(**args)
    213 except Exception as e:
    214     # Normal TypeError does not print class name.

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:21, in mossmoon003sftpluginTextGenerationPipeline.__init__(self, model, *args, **kwargs)
     16 def __init__(
     17         self,
     18         model: Union[Model, str],
     19         *args,
     20         **kwargs):
---> 21     model = mossmoon003sftpluginTextGeneration(model) if isinstance(model, str) else model
     22     super().__init__(model=model, **kwargs)

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:43, in mossmoon003sftpluginTextGeneration.__init__(self, model_dir, *args, **kwargs)
     42 # loading tokenizer
---> 43 self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
     44 self.model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True).half()

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:774, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    773         tokenizer_class.register_for_auto_class()
--> 774     return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
    775 elif config_tokenizer_class is not None:

File /opt/conda/lib/python3.10/site-packages/modelscope/utils/hf_util.py:52, in patch_tokenizer_base.<locals>.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
     51     model_dir = pretrained_model_name_or_path
---> 52 return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2028, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
   2026         logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2028 return cls._from_pretrained(
   2029     resolved_vocab_files,
   2030     pretrained_model_name_or_path,
   2031     init_configuration,
   2032     *init_inputs,
   2033     token=token,
   2034     cache_dir=cache_dir,
   2035     local_files_only=local_files_only,
   2036     _commit_hash=commit_hash,
   2037     _is_local=is_local,
   2038     **kwargs,
   2039 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2260, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
   2259 try:
-> 2260     tokenizer = cls(*init_inputs, **init_kwargs)
   2261 except OSError:

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:149, in MossTokenizer.__init__(self, vocab_file, merges_file, errors, unk_token, bos_token, eos_token, pad_token, add_prefix_space, add_bos_token, **kwargs)
    148 pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
--> 149 super().__init__(
    150     errors=errors,
    151     unk_token=unk_token,
    152     bos_token=bos_token,
    153     eos_token=eos_token,
    154     pad_token=pad_token,
    155     add_prefix_space=add_prefix_space,
    156     add_bos_token=add_bos_token,
    157     **kwargs,
    158 )
    159 self.add_bos_token = add_bos_token

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:367, in PreTrainedTokenizer.__init__(self, **kwargs)
    365 # 4. If some of the special tokens are not part of the vocab, we add them, at the end.
    366 # the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following `tokenizers`
--> 367 self._add_tokens(
    368     [token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder],
    369     special_tokens=True,
    370 )
    372 self._decode_use_source_tokenizer = False

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:467, in PreTrainedTokenizer._add_tokens(self, new_tokens, special_tokens)
    466 # TODO this is fairly slow to improve!
--> 467 current_vocab = self.get_vocab().copy()
    468 new_idx = len(current_vocab)  # only call this once, len gives the last index + 1

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:182, in MossTokenizer.get_vocab(self)
    181 def get_vocab(self):
--> 182     return dict(self.encoder, **self.added_tokens_encoder)

**AttributeError: 'MossTokenizer' object has no attribute 'encoder'**`

后来发现moss-moon-003-sft也是同样的问题，有老哥知道怎么回事吗？

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

moss-moon-003-sft Jupyter测试失败 #376

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

moss-moon-003-sft Jupyter测试失败 #376

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions