Skip to content

moss-moon-003-sft Jupyter测试失败 #376

Open
@YoloZyk

Description

@YoloZyk

在ModelScope上发布的moss-moon-003-sft-plugin模型,使用提供的Jupyter测试样例部署失败,在自己服务器和魔塔提供的实例上都不可以,错误是AttributeError: 'MossTokenizer' object has no attribute 'encoder',GPU用的A40不知道有没有影响
`AttributeError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/modelscope/utils/registry.py:212, in build_from_cfg(cfg, registry, group_key, default_args)
211 else:
--> 212 return obj_cls(**args)
213 except Exception as e:
214 # Normal TypeError does not print class name.

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:21, in mossmoon003sftpluginTextGenerationPipeline.init(self, model, *args, **kwargs)
16 def init(
17 self,
18 model: Union[Model, str],
19 *args,
20 **kwargs):
---> 21 model = mossmoon003sftpluginTextGeneration(model) if isinstance(model, str) else model
22 super().init(model=model, **kwargs)

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:43, in mossmoon003sftpluginTextGeneration.init(self, model_dir, *args, **kwargs)
42 # loading tokenizer
---> 43 self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
44 self.model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True).half()

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:774, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
773 tokenizer_class.register_for_auto_class()
--> 774 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
775 elif config_tokenizer_class is not None:

File /opt/conda/lib/python3.10/site-packages/modelscope/utils/hf_util.py:52, in patch_tokenizer_base..from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
51 model_dir = pretrained_model_name_or_path
---> 52 return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2028, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
2026 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2028 return cls._from_pretrained(
2029 resolved_vocab_files,
2030 pretrained_model_name_or_path,
2031 init_configuration,
2032 *init_inputs,
2033 token=token,
2034 cache_dir=cache_dir,
2035 local_files_only=local_files_only,
2036 _commit_hash=commit_hash,
2037 _is_local=is_local,
2038 **kwargs,
2039 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2260, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2259 try:
-> 2260 tokenizer = cls(*init_inputs, **init_kwargs)
2261 except OSError:

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:149, in MossTokenizer.init(self, vocab_file, merges_file, errors, unk_token, bos_token, eos_token, pad_token, add_prefix_space, add_bos_token, **kwargs)
148 pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
--> 149 super().init(
150 errors=errors,
151 unk_token=unk_token,
152 bos_token=bos_token,
153 eos_token=eos_token,
154 pad_token=pad_token,
155 add_prefix_space=add_prefix_space,
156 add_bos_token=add_bos_token,
157 **kwargs,
158 )
159 self.add_bos_token = add_bos_token

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:367, in PreTrainedTokenizer.init(self, **kwargs)
365 # 4. If some of the special tokens are not part of the vocab, we add them, at the end.
366 # the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following tokenizers
--> 367 self._add_tokens(
368 [token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder],
369 special_tokens=True,
370 )
372 self._decode_use_source_tokenizer = False

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:467, in PreTrainedTokenizer._add_tokens(self, new_tokens, special_tokens)
466 # TODO this is fairly slow to improve!
--> 467 current_vocab = self.get_vocab().copy()
468 new_idx = len(current_vocab) # only call this once, len gives the last index + 1

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:182, in MossTokenizer.get_vocab(self)
181 def get_vocab(self):
--> 182 return dict(self.encoder, **self.added_tokens_encoder)

AttributeError: 'MossTokenizer' object has no attribute 'encoder'`

后来发现moss-moon-003-sft也是同样的问题,有老哥知道怎么回事吗?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions