@@ -83,26 +83,31 @@ absl::Status LegacyServable::parseRequest(std::shared_ptr<GenAiServableExecution
8383
8484 if (legacyExecutionContext->apiHandler ->isStream ()) {
8585 legacyExecutionContext->lastStreamerCallbackOutput = " " ; // initialize with empty string
86- auto callback = [& executionInProgress = legacyExecutionContext->executionInProgress , &mutex = legacyExecutionContext->mutex , &lastStreamerCallbackOutput = legacyExecutionContext->lastStreamerCallbackOutput , &clientDisconnected = legacyExecutionContext->clientDisconnected ](std::string text) {
87- SPDLOG_LOGGER_TRACE (llm_calculator_logger, " Streamer callback executed with text: [{}]" , text);
88- if (clientDisconnected.load ()) {
89- executionInProgress.notify_one ();
90- return ov::genai::StreamingStatus::CANCEL;
91- }
92- {
93- std::lock_guard<std::mutex> lock (mutex);
94- lastStreamerCallbackOutput += text;
95- executionInProgress.notify_one ();
96- }
97- return ov::genai::StreamingStatus::RUNNING;
98- };
99- ov::AnyMap streamerConfig;
100- if (legacyExecutionContext->apiHandler ->getOutputParser () != nullptr &&
101- (legacyExecutionContext->apiHandler ->getOutputParser ()->requiresStreamingWithSpecialTokens ())) {
102- streamerConfig.insert (ov::genai::skip_special_tokens (false ));
86+ }
87+ auto callback = [& executionInProgress = legacyExecutionContext->executionInProgress ,
88+ &mutex = legacyExecutionContext->mutex ,
89+ &lastStreamerCallbackOutput = legacyExecutionContext->lastStreamerCallbackOutput ,
90+ &clientDisconnected = legacyExecutionContext->clientDisconnected ,
91+ streamMode = legacyExecutionContext->apiHandler ->isStream ()](std::string text) {
92+ SPDLOG_LOGGER_TRACE (llm_calculator_logger, " Streamer callback executed with text: [{}]" , text);
93+ if (clientDisconnected.load ()) {
94+ executionInProgress.notify_one ();
95+ return ov::genai::StreamingStatus::CANCEL;
96+ }
97+ if (streamMode) {
98+ std::lock_guard<std::mutex> lock (mutex);
99+ lastStreamerCallbackOutput += text;
100+ executionInProgress.notify_one ();
103101 }
104- legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties ()->tokenizer , callback, streamerConfig);
102+ return ov::genai::StreamingStatus::RUNNING;
103+ };
104+ ov::AnyMap streamerConfig;
105+ if (legacyExecutionContext->apiHandler ->isStream () &&
106+ legacyExecutionContext->apiHandler ->getOutputParser () != nullptr &&
107+ (legacyExecutionContext->apiHandler ->getOutputParser ()->requiresStreamingWithSpecialTokens ())) {
108+ streamerConfig.insert (ov::genai::skip_special_tokens (false ));
105109 }
110+ legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties ()->tokenizer , callback, streamerConfig);
106111 legacyExecutionContext->generationConfigBuilder = std::make_shared<GenerationConfigBuilder>(getProperties ()->baseGenerationConfig ,
107112 getProperties ()->toolParserName ,
108113 getProperties ()->enableToolGuidedGeneration ,
0 commit comments