-
Notifications
You must be signed in to change notification settings - Fork 238
Qwen3-VL support #3988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Qwen3-VL support #3988
Changes from 4 commits
1d9d0a7
3721f50
2f930ed
94203b0
beb8a3f
180e53d
36f522f
7e94c18
21ab2fd
2ef6bb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,7 @@ | |
| #include <vector> | ||
|
|
||
| #include <openvino/runtime/tensor.hpp> | ||
| #include <openvino/genai/json_container.hpp> | ||
| #include <openvino/genai/tokenizer.hpp> | ||
|
|
||
| #include "src/port/rapidjson_document.hpp" | ||
|
|
@@ -78,6 +79,8 @@ struct OpenAIChatCompletionsRequest { | |
| std::optional<std::string> responseFormat{std::nullopt}; | ||
| // Map that holds tool names and schemas for their arguments | ||
| ToolsSchemas_t toolNameSchemaMap; | ||
| // Full tools payload in JSON form for passing directly to tokenizer chat template. | ||
| std::optional<ov::genai::JsonContainer> tools{std::nullopt}; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that a fully copy? We will have duplicated tools in http payload content with that change right?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, actually 3 copies: one in request body (string), second in rapidjson format (toolNameSchemaMap) and now third one in ov::genai::JsonContainer format. we need this to be able to provide it to GenAI's apply_chat_template. |
||
| // Holds value for tool_choice field as described in https://platform.openai.com/docs/api-reference/chat/create#chat_create-tool_choice | ||
| std::string toolChoice; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,7 +81,9 @@ absl::Status VisualLanguageModelLegacyServable::parseRequest(std::shared_ptr<Gen | |
| legacyExecutionContext->apiHandler = std::make_shared<OpenAIChatCompletionsHandler>(*legacyExecutionContext->payload.parsedJson, | ||
| legacyExecutionContext->endpoint, | ||
| std::chrono::system_clock::now(), | ||
| getProperties()->tokenizer); | ||
| getProperties()->tokenizer, | ||
| getProperties()->toolParserName, | ||
| getProperties()->reasoningParserName); | ||
| auto& config = ovms::Config::instance(); | ||
|
|
||
| auto status = executionContext->apiHandler->parseRequest(getProperties()->maxTokensLimit, getProperties()->bestOfLimit, getProperties()->maxModelLength, config.getServerSettings().allowedLocalMediaPath, config.getServerSettings().allowedMediaDomains); | ||
|
|
@@ -101,7 +103,12 @@ absl::Status VisualLanguageModelLegacyServable::parseRequest(std::shared_ptr<Gen | |
| } | ||
| return ov::genai::StreamingStatus::RUNNING; | ||
| }; | ||
| legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties()->tokenizer, callback); | ||
| ov::AnyMap streamerConfig; | ||
| if (legacyExecutionContext->apiHandler->getOutputParser() != nullptr && | ||
| (legacyExecutionContext->apiHandler->getOutputParser()->requiresStreamingWithSpecialTokens())) { | ||
| streamerConfig.insert(ov::genai::skip_special_tokens(false)); | ||
| } | ||
| legacyExecutionContext->textStreamer = std::make_shared<ov::genai::TextStreamer>(getProperties()->tokenizer, callback, streamerConfig); | ||
| } | ||
| legacyExecutionContext->generationConfigBuilder = std::make_shared<GenerationConfigBuilder>(getProperties()->baseGenerationConfig, | ||
| getProperties()->toolParserName, | ||
|
|
@@ -222,6 +229,7 @@ absl::Status VisualLanguageModelLegacyServable::preparePartialResponse(std::shar | |
| return absl::OkStatus(); | ||
| } | ||
|
|
||
| // Legacy VLM | ||
dkalinowski marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| absl::Status VisualLanguageModelLegacyServable::prepareInputs(std::shared_ptr<GenAiServableExecutionContext>& executionContext) { | ||
| auto vlmExecutionContext = std::static_pointer_cast<VisualLanguageModelLegacyServableExecutionContext>(executionContext); | ||
| if (vlmExecutionContext->apiHandler == nullptr) { | ||
|
|
@@ -252,7 +260,12 @@ absl::Status VisualLanguageModelLegacyServable::prepareInputs(std::shared_ptr<Ge | |
| } | ||
|
|
||
| constexpr bool add_generation_prompt = true; // confirm it should be hardcoded | ||
| vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, add_generation_prompt); | ||
| const auto& tools = vlmExecutionContext->apiHandler->getTools(); | ||
| if (tools.has_value()) { | ||
| vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, add_generation_prompt, {}, tools); | ||
| } else { | ||
| vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, add_generation_prompt, {}); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the empty brackets arg? we didn't need it before, what does it stand for?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is custom chat template. if empty string is provided, internal one is used. |
||
| } | ||
| } else { | ||
| return absl::InvalidArgumentError("Unsupported endpoint"); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How does it work? Why do we have such condition only for uint64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is because genai::JsonContainer has no support for uint64, only int64