diff --git a/src/cpp/src/tokenizer/tokenizer_impl.cpp b/src/cpp/src/tokenizer/tokenizer_impl.cpp index 01894d5a3c..20e9fba596 100644 --- a/src/cpp/src/tokenizer/tokenizer_impl.cpp +++ b/src/cpp/src/tokenizer/tokenizer_impl.cpp @@ -423,8 +423,32 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::pairget_idle().get(); + auto& req = m_ireq_queue_tokenizer->get(idx); + + // TODO CVS-150630: Empty strings sporadically can fail, therefore use nonempty string for warmup. + // shared_ptr to keep input data alive until async request is finished + auto warmup_text = std::make_shared("non empty string"); + auto warmup_tensor = ov::Tensor(ov::element::string, ov::Shape{1}, warmup_text.get()); + + req.set_input_tensor(0, warmup_tensor); + if (is_paired_input) { + // Set to an empty tensor to avoid errors. + // The subgraph within the ov::Model will handle this scenario, ensuring the output remains correct. + req.set_input_tensor(1, ov::Tensor{ov::element::string, {0}}); + } + + req.set_callback([queue = m_ireq_queue_tokenizer.get(), idx, warmup_text, &req](std::exception_ptr) { + // this empty placeholder keeps input data alive until request is finished + (void) warmup_text; + queue->return_to(idx); + req.set_callback({}); + + }); + req.start_async(); + } } if (ov_detokenizer) { @@ -447,8 +471,28 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::pairget_idle().get(); + auto& req = m_ireq_queue_detokenizer->get(idx); + + // shared_ptr to keep input data alive until async request is finished + auto warmup_tokens = std::make_shared>( + std::initializer_list{1, 33, 199, 42, 42} + ); + + auto warmup_tensor = ov::Tensor(ov::element::i64, ov::Shape{1, warmup_tokens->size()}, warmup_tokens->data()); + req.set_input_tensor(0, warmup_tensor); + + req.set_callback([queue = m_ireq_queue_detokenizer.get(), idx, warmup_tokens, &req](std::exception_ptr) { + // this empty placeholder keeps input data alive until request is finished + (void) warmup_tokens; + queue->return_to(idx); + req.set_callback({}); + }); + req.start_async(); + } m_vocab = read_vocab_from_detokenizer_model(ov_detokenizer); }