Skip to content

Commit 9bf1250

Browse files
committed
unary
1 parent e42011a commit 9bf1250

File tree

2 files changed

+41
-32
lines changed

2 files changed

+41
-32
lines changed

src/llm/language_model/legacy/servable.cpp

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -83,26 +83,31 @@ absl::Status LegacyServable::parseRequest(std::shared_ptr<GenAiServableExecution
8383

8484
if (legacyExecutionContext->apiHandler->isStream()) {
8585
legacyExecutionContext->lastStreamerCallbackOutput = ""; // initialize with empty string
86-
auto callback = [& executionInProgress = legacyExecutionContext->executionInProgress, &mutex = legacyExecutionContext->mutex, &lastStreamerCallbackOutput = legacyExecutionContext->lastStreamerCallbackOutput, &clientDisconnected = legacyExecutionContext->clientDisconnected](std::string text) {
87-
SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Streamer callback executed with text: [{}]", text);
88-
if (clientDisconnected.load()) {
89-
executionInProgress.notify_one();
90-
return ov::genai::StreamingStatus::CANCEL;
91-
}
92-
{
93-
std::lock_guard<std::mutex> lock(mutex);
94-
lastStreamerCallbackOutput += text;
95-
executionInProgress.notify_one();
96-
}
97-
return ov::genai::StreamingStatus::RUNNING;
98-
};
99-
ov::AnyMap streamerConfig;
100-
if (legacyExecutionContext->apiHandler->getOutputParser() != nullptr &&
101-
(legacyExecutionContext->apiHandler->getOutputParser()->requiresStreamingWithSpecialTokens())) {
102-
streamerConfig.insert(ov::genai::skip_special_tokens(false));
86+
}
87+
auto callback = [& executionInProgress = legacyExecutionContext->executionInProgress,
88+
&mutex = legacyExecutionContext->mutex,
89+
&lastStreamerCallbackOutput = legacyExecutionContext->lastStreamerCallbackOutput,
90+
&clientDisconnected = legacyExecutionContext->clientDisconnected,
91+
streamMode = legacyExecutionContext->apiHandler->isStream()](std::string text) {
92+
SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Streamer callback executed with text: [{}]", text);
93+
if (clientDisconnected.load()) {
94+
executionInProgress.notify_one();
95+
return ov::genai::StreamingStatus::CANCEL;
96+
}
97+
if (streamMode) {
98+
std::lock_guard<std::mutex> lock(mutex);
99+
lastStreamerCallbackOutput += text;
100+
executionInProgress.notify_one();
103101
}
104-
legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties()->tokenizer, callback, streamerConfig);
102+
return ov::genai::StreamingStatus::RUNNING;
103+
};
104+
ov::AnyMap streamerConfig;
105+
if (legacyExecutionContext->apiHandler->isStream() &&
106+
legacyExecutionContext->apiHandler->getOutputParser() != nullptr &&
107+
(legacyExecutionContext->apiHandler->getOutputParser()->requiresStreamingWithSpecialTokens())) {
108+
streamerConfig.insert(ov::genai::skip_special_tokens(false));
105109
}
110+
legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties()->tokenizer, callback, streamerConfig);
106111
legacyExecutionContext->generationConfigBuilder = std::make_shared<GenerationConfigBuilder>(getProperties()->baseGenerationConfig,
107112
getProperties()->toolParserName,
108113
getProperties()->enableToolGuidedGeneration,

src/llm/visual_language_model/legacy/servable.cpp

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -92,21 +92,25 @@ absl::Status VisualLanguageModelLegacyServable::parseRequest(std::shared_ptr<Gen
9292

9393
if (legacyExecutionContext->apiHandler->isStream()) {
9494
legacyExecutionContext->lastStreamerCallbackOutput = ""; // initialize with empty string
95-
auto callback = [& executionInProgress = legacyExecutionContext->executionInProgress, &mutex = legacyExecutionContext->mutex, &lastStreamerCallbackOutput = legacyExecutionContext->lastStreamerCallbackOutput, &clientDisconnected = legacyExecutionContext->clientDisconnected](std::string text) {
96-
SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Streamer callback executed with text: [{}]", text);
97-
if (clientDisconnected.load()) {
98-
executionInProgress.notify_one();
99-
return ov::genai::StreamingStatus::CANCEL;
100-
}
101-
{
102-
std::lock_guard<std::mutex> lock(mutex);
103-
lastStreamerCallbackOutput += text;
104-
executionInProgress.notify_one();
105-
}
106-
return ov::genai::StreamingStatus::RUNNING;
107-
};
108-
legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties()->tokenizer, callback);
10995
}
96+
auto callback = [& executionInProgress = legacyExecutionContext->executionInProgress,
97+
&mutex = legacyExecutionContext->mutex,
98+
&lastStreamerCallbackOutput = legacyExecutionContext->lastStreamerCallbackOutput,
99+
&clientDisconnected = legacyExecutionContext->clientDisconnected,
100+
streamMode = legacyExecutionContext->apiHandler->isStream()](std::string text) {
101+
SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Streamer callback executed with text: [{}]", text);
102+
if (clientDisconnected.load()) {
103+
executionInProgress.notify_one();
104+
return ov::genai::StreamingStatus::CANCEL;
105+
}
106+
if (streamMode) {
107+
std::lock_guard<std::mutex> lock(mutex);
108+
lastStreamerCallbackOutput += text;
109+
executionInProgress.notify_one();
110+
}
111+
return ov::genai::StreamingStatus::RUNNING;
112+
};
113+
legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties()->tokenizer, callback);
110114
legacyExecutionContext->generationConfigBuilder = std::make_shared<GenerationConfigBuilder>(getProperties()->baseGenerationConfig,
111115
getProperties()->toolParserName,
112116
getProperties()->enableToolGuidedGeneration,

0 commit comments

Comments
 (0)