Skip to content

Commit d527ea6

Browse files
authored
added sdpa llm inference for qwen3.5 model support (#107)
* added modeling code from openvino-new-arch repo. this is the code base dependendcy for supporing qwen3.5 model. Signed-off-by: Zhang, Xiaolin <xiaolin.zhang@intel.com> * added sdpa llm inference for qwen3.5 model support. Signed-off-by: Zhang, Xiaolin <xiaolin.zhang@intel.com> * added a unit test for LLMInferenceSDPAModule for Qwen3.5 Signed-off-by: Zhang, Xiaolin <xiaolin.zhang@intel.com> * added compiler flag -DENABLE_OPENVINO_NEW_ARCH to control build behavior to support both public openvino runtime and new arch for qwen3.5 support Signed-off-by: Zhang, Xiaolin <xiaolin.zhang@intel.com> --------- Signed-off-by: Zhang, Xiaolin <xiaolin.zhang@intel.com>
1 parent ccb92b4 commit d527ea6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+10728
-3
lines changed

cmake/features.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ option(ENABLE_TOOLS "Enable tools build" ON)
1111
option(ENABLE_GGUF "Enable support for GGUF format" ON)
1212
option(ENABLE_XGRAMMAR "Enable support for structured output generation with xgrammar backend" ON)
1313
option(ENABLE_DYNAMIC_WEIGHT_MANAGEMENT "Enable offloading model weights (load/release)" OFF)
14+
option(ENABLE_OPENVINO_NEW_ARCH "Enable OpenVINO new architecture for QWen3.5 etc models support" OFF)
1415

1516
# Disable building samples for NPM package
1617
if(CPACK_GENERATOR STREQUAL "NPM")

samples/cpp/module_genai/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ if(POLICY CMP0135)
4646
cmake_policy(SET CMP0135 NEW)
4747
endif()
4848

49-
add_subdirectory(comfyui)
49+
add_subdirectory(comfyui)

src/cpp/CMakeLists.txt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0
33
#
44

5-
file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")
5+
file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c")
6+
67
list(APPEND SOURCE_FILES "${CMAKE_CURRENT_BINARY_DIR}/version.cpp")
78

89
# Dependencies
@@ -131,6 +132,12 @@ set(TARGET_NAME_OBJ ${TARGET_NAME}_obj)
131132

132133
add_library(${TARGET_NAME_OBJ} OBJECT ${SOURCE_FILES})
133134

135+
if(ENABLE_OPENVINO_NEW_ARCH)
136+
file(GLOB_RECURSE MODELING_SOURCES CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/modeling/*.cpp")
137+
target_sources(${TARGET_NAME_OBJ} PRIVATE ${MODELING_SOURCES})
138+
target_compile_definitions(${TARGET_NAME_OBJ} PRIVATE ENABLE_OPENVINO_NEW_ARCH)
139+
endif()
140+
134141
target_include_directories(${TARGET_NAME_OBJ}
135142
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
136143
"$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>"
@@ -154,6 +161,12 @@ target_include_directories(${TARGET_NAME_OBJ} SYSTEM PRIVATE "${safetensors.h_SO
154161

155162
target_link_libraries(${TARGET_NAME_OBJ} PRIVATE openvino::runtime openvino::threading nlohmann_json::nlohmann_json minja ${YAML_CPP_TARGET} PRIVATE TBB::tbb)
156163

164+
# modeling/ sources use internal OpenVINO dev-api headers (e.g. openvino/op/linear_attn.hpp)
165+
# openvino::core::dev exposes src/core/dev_api which contains these headers.
166+
if(TARGET openvino::core::dev)
167+
target_link_libraries(${TARGET_NAME_OBJ} PRIVATE openvino::core::dev)
168+
endif()
169+
157170
target_compile_features(${TARGET_NAME_OBJ} PRIVATE cxx_std_17)
158171

159172
target_compile_definitions(${TARGET_NAME_OBJ} PRIVATE openvino_genai_EXPORTS)
@@ -190,7 +203,7 @@ endif()
190203

191204
target_compile_features(${TARGET_NAME} INTERFACE cxx_std_17)
192205

193-
# Add /bigobj flag for MSVC to handle large object files
206+
# Add /bigobj flag for MSVC to handle large object files
194207
if(MSVC)
195208
target_compile_options(${TARGET_NAME} PRIVATE "/bigobj")
196209
endif()
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright (C) 2023-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#pragma once
5+
6+
#include <string>
7+
#include <unordered_map>
8+
9+
#include <openvino/openvino.hpp>
10+
11+
#include "gguf_utils/gguf.hpp"
12+
#include "modeling/ops/context.hpp"
13+
#include "modeling/weights/weight_finalizer.hpp"
14+
15+
namespace ov {
16+
namespace genai {
17+
namespace gguf {
18+
19+
class GGUFWeightFinalizer : public ov::genai::modeling::weights::WeightFinalizer {
20+
public:
21+
GGUFWeightFinalizer(const std::unordered_map<std::string, ov::Tensor>& consts,
22+
const std::unordered_map<std::string, gguf_tensor_type>& qtypes);
23+
24+
ov::genai::modeling::weights::FinalizedWeight finalize(const std::string& name,
25+
ov::genai::modeling::weights::WeightSource& source,
26+
ov::genai::modeling::OpContext& ctx) override;
27+
28+
private:
29+
gguf_tensor_type resolve_qtype(const std::string& base_key) const;
30+
std::string base_key_from_name(const std::string& name) const;
31+
32+
const std::unordered_map<std::string, ov::Tensor>& consts_;
33+
const std::unordered_map<std::string, gguf_tensor_type>& qtypes_;
34+
std::unordered_map<std::string, ov::Output<ov::Node>> cache_;
35+
};
36+
37+
} // namespace gguf
38+
} // namespace genai
39+
} // namespace ov
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright (C) 2023-2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#pragma once
5+
6+
#include <string>
7+
#include <unordered_map>
8+
#include <vector>
9+
10+
#include <openvino/openvino.hpp>
11+
12+
#include "modeling/weights/weight_source.hpp"
13+
14+
namespace ov {
15+
namespace genai {
16+
namespace gguf {
17+
18+
class GGUFWeightSource : public ov::genai::modeling::weights::WeightSource {
19+
public:
20+
explicit GGUFWeightSource(const std::unordered_map<std::string, ov::Tensor>& consts);
21+
22+
std::vector<std::string> keys() const override;
23+
bool has(const std::string& name) const override;
24+
const ov::Tensor& get_tensor(const std::string& name) const override;
25+
26+
private:
27+
const std::unordered_map<std::string, ov::Tensor>& consts_;
28+
std::vector<std::string> keys_; // Canonical names
29+
std::unordered_map<std::string, std::string> canonical_to_gguf_; // canonical -> gguf mapping
30+
};
31+
32+
} // namespace gguf
33+
} // namespace genai
34+
} // namespace ov

0 commit comments

Comments
 (0)