Skip to content

Commit 98009a7

Browse files
committed
calling script cvt convert model in cpp source codes are not allowed. (#130)
remove them. Signed-off-by: xipingya <xiping.yan@intel.com>
1 parent 7ff5055 commit 98009a7

File tree

1 file changed

+0
-46
lines changed

1 file changed

+0
-46
lines changed

src/cpp/src/tokenizer/tokenizer_impl.cpp

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
#include "sampling/structured_output/structured_output_controller.hpp"
77
#include "openvino/genai/version.hpp"
88

9-
#include <iostream>
10-
#include <algorithm>
11-
129
namespace ov {
1310
namespace genai {
1411

@@ -334,49 +331,6 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::filesystem::path& mode
334331
setup_tokenizer(std::make_pair(ov_tokenizer, ov_detokenizer), filtered_properties);
335332
return;
336333
}
337-
338-
// Check if we need to convert a HuggingFace tokenizer.
339-
// Support both tokenizer.json (modern) and vocab.json+merges.txt (GPT2-style)
340-
bool has_tokenizer_json = std::filesystem::exists(models_path / "tokenizer.json");
341-
bool has_gpt2_tokenizer = std::filesystem::exists(models_path / "vocab.json") &&
342-
std::filesystem::exists(models_path / "merges.txt");
343-
bool has_hf_tokenizer = has_tokenizer_json || has_gpt2_tokenizer;
344-
bool has_ov_tokenizer = std::filesystem::exists(models_path / "openvino_tokenizer.xml");
345-
bool has_ov_detokenizer = std::filesystem::exists(models_path / "openvino_detokenizer.xml");
346-
bool needs_tokenizer_conversion = has_hf_tokenizer && (!has_ov_tokenizer || !has_ov_detokenizer);
347-
348-
if (needs_tokenizer_conversion) {
349-
// Convert HuggingFace tokenizer to OpenVINO format
350-
std::cout << "[Tokenizer] Converting HuggingFace tokenizer to OpenVINO format..." << std::endl;
351-
352-
std::string model_dir_str = models_path.string();
353-
std::string tokenizer_path_str = (models_path / "openvino_tokenizer.xml").string();
354-
std::string detokenizer_path_str = (models_path / "openvino_detokenizer.xml").string();
355-
356-
// Replace backslashes with forward slashes for Python compatibility
357-
std::replace(model_dir_str.begin(), model_dir_str.end(), '\\', '/');
358-
std::replace(tokenizer_path_str.begin(), tokenizer_path_str.end(), '\\', '/');
359-
std::replace(detokenizer_path_str.begin(), detokenizer_path_str.end(), '\\', '/');
360-
361-
std::string python_cmd =
362-
"python -c \""
363-
"from transformers import AutoTokenizer; "
364-
"from openvino_tokenizers import convert_tokenizer; "
365-
"from openvino import save_model; "
366-
"t = AutoTokenizer.from_pretrained('" + model_dir_str + "'); "
367-
"tok, detok = convert_tokenizer(t, with_detokenizer=True); "
368-
"save_model(tok, '" + tokenizer_path_str + "'); "
369-
"save_model(detok, '" + detokenizer_path_str + "'); "
370-
"print('Tokenizer conversion successful')\"";
371-
372-
int result = std::system(python_cmd.c_str());
373-
if (result != 0) {
374-
std::cerr << "[Tokenizer] Warning: Tokenizer conversion failed" << std::endl;
375-
} else {
376-
std::cout << "[Tokenizer] Tokenizer conversion completed successfully" << std::endl;
377-
}
378-
}
379-
380334
if (std::filesystem::exists(models_path / "openvino_tokenizer.xml")) {
381335
ov_tokenizer = core.read_model(models_path / "openvino_tokenizer.xml", {}, filtered_properties);
382336
}

0 commit comments

Comments
 (0)