@@ -15,7 +15,7 @@ enum class TokenType {
1515};
1616
1717constexpr std::pair<const char *, TokenType> kTokenizerDict [] = {
18- {" PreTrainedTokenizerFast " , TokenType::kBPE },
18+ {" PreTrainedTokenizer " , TokenType::kBPE },
1919 {" CLIPTokenizer" , TokenType::kBPE },
2020 {" WhisperTokenizer" , TokenType::kBPE },
2121 {" GemmaTokenizer" , TokenType::kBPE },
@@ -256,10 +256,16 @@ class TokenJsonConfig final {
256256 }
257257
258258 static TokenType GetTokenType (const std::string& tok) {
259- static const std::unordered_map<std::string , TokenType> dict {
259+ static const std::unordered_map<std::string_view , TokenType> dict {
260260 std::begin (kTokenizerDict ), std::end (kTokenizerDict ) };
261261
262- auto iter = dict.find (tok);
262+ std::string_view tok_class (tok);
263+ auto pos = tok_class.find (" Fast" );
264+ if (pos != std::string_view::npos && pos + 4 == tok_class.size ()) {
265+ tok_class.remove_suffix (4 );
266+ }
267+
268+ auto iter = dict.find (tok_class);
263269 return iter == dict.end () ? TokenType::kUnknown : iter->second ;
264270 }
265271
0 commit comments