Skip to content

Commit c67485a

Browse files
authored
Merge pull request #634 from chenghuaWang/wch-main
feat(mllm_kernel): add initial implementation of mllm-kernel with CPU and JIT utilities
2 parents d1615d2 + 1cc8a33 commit c67485a

36 files changed

Lines changed: 2973 additions & 7 deletions

.codespellrc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[codespell]
2+
ignore-words-list = ans, als, hel, boostrap, childs, te, vas, hsa, ment, cann, thi, makro, wil, rouge, PRIS, bfloat, constexpr, cuda, dlpack, expt, forceinline, ifndef, linalg, LPBQ, mllm, pymllm, Quantizaton, Qwen, ROCM, silu, torchao
3+
skip = *.json,*.jsonl,*.patch,*.txt

README-ZH.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ mllm 框架可以与主流社区框架的模型检查点无缝集成。通过 ml
7474

7575
### mllm v2
7676

77-
| Model(v1) | CPU | Hexagon NPU <br> INT8 |
77+
| Model(v2) | CPU | Hexagon NPU <br> INT8 |
7878
|-----------------------------------------------------------------------------|------|-----------------------|
7979
| [Qwen3-0.6B](https://github.com/QwenLM/Qwen3) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen3-0.6B-w4a32kai) | |
80-
| [Qwen3-1.7B](https://github.com/QwenLM/Qwen3) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen3-1.7B-w4a8-i8mm-kai) | |
80+
| [Qwen3-1.7B](https://github.com/QwenLM/Qwen3) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen3-1.7B-w4a8-i8mm-kai) | [W4A16-SM8650](https://modelscope.cn/models/mllmTeam/Qwen3-1.7B-Qnn-AOT-SM8650/summary) |
8181
| [DeepSeek-OCR](https://github.com/deepseek-ai/DeepSeek-OCR) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/DeepSeek-OCR-w4a8-i8mm-kai) | |
8282
| [SmolLM3](https://huggingface.co/blog/smollm3)| [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/SmolLM3-3B-w4a8-i8mm-kai) | |
8383
| [Qwen2-VL-2B-Instruct](https://qwenlm.github.io/zh/blog/qwen2-vl/)|[✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen2-VL-2B-Instruct-w4a32kai) ||

README.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ The mllm framework integrates seamlessly with popular community frameworks' chec
7575
| Model(v1) | CPU | Hexagon NPU <br> INT8 |
7676
|-----------------------------------------------------------------------------|------|-----------------------|
7777
| [Qwen3-0.6B](https://github.com/QwenLM/Qwen3) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen3-0.6B-w4a32kai) | |
78-
| [Qwen3-1.7B](https://github.com/QwenLM/Qwen3) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen3-1.7B-w4a8-i8mm-kai) | |
78+
| [Qwen3-1.7B](https://github.com/QwenLM/Qwen3) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen3-1.7B-w4a8-i8mm-kai) | [W4A16-SM8650](https://modelscope.cn/models/mllmTeam/Qwen3-1.7B-Qnn-AOT-SM8650/) |
7979
| [DeepSeek-OCR](https://github.com/deepseek-ai/DeepSeek-OCR) | [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/DeepSeek-OCR-w4a8-i8mm-kai) | |
8080
| [SmolLM3](https://huggingface.co/blog/smollm3)| [✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/SmolLM3-3B-w4a8-i8mm-kai) | |
8181
| [Qwen2-VL-2B-Instruct](https://qwenlm.github.io/zh/blog/qwen2-vl/)|[✔️ w4a8](https://www.modelscope.cn/models/mllmTeam/Qwen2-VL-2B-Instruct-w4a32kai) ||
@@ -125,8 +125,10 @@ The mllm framework integrates seamlessly with popular community frameworks' chec
125125
| :---: | :---: | :---: | :---: | :---: |
126126
| PC-X86-w/oAVX512 | Ubuntu 22.04 | ![build-passing](https://img.shields.io/badge/build-passing-green) | - | - |
127127
| Nvidia A40 | Ubuntu 22.04 | - | ![build-passing](https://img.shields.io/badge/build-passing-green) | - |
128-
| Xiaomi14-8Elite | Android 15 | ![build-passing](https://img.shields.io/badge/build-passing-green) | - | ![build-pending](https://img.shields.io/badge/build-pending-gray) |
129-
| OnePlus13-8Elite | Android 15 | ![build-passing](https://img.shields.io/badge/build-passing-green) | - | ![build-pending](https://img.shields.io/badge/build-pending-gray) |
128+
| Nvidia RTX Pro 6000 | Ubuntu 22.04 | - | ![build-passing](https://img.shields.io/badge/build-passing-green) | - |
129+
| Nvidia H20 | Ubuntu 22.04 | - | ![build-passing](https://img.shields.io/badge/build-passing-green) | - |
130+
| Xiaomi14-8Elite | Android 15 | ![build-passing](https://img.shields.io/badge/build-passing-green) | - | ![build-passing](https://img.shields.io/badge/build-passing-green) |
131+
| OnePlus13-8Elite | Android 15 | ![build-passing](https://img.shields.io/badge/build-passing-green) | - | ![build-passing](https://img.shields.io/badge/build-passing-green) |
130132
| MacMini-M4 | MacOS 15.5 | ![build-passing](https://img.shields.io/badge/build-passing-green) | - | - |
131133
| OrangePi AI Pro(310B) | Ubuntu 22.04 | - | - | ![build-pending](https://img.shields.io/badge/build-pending-gray) |
132134
| OrangePi AI Studio(310P) | Ubuntu 22.04 | - | - | ![build-pending](https://img.shields.io/badge/build-pending-gray) |

examples/qwen3_qnn_aot/config_1.7B.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@
2828
"use_sliding_window": false,
2929
"vocab_size": 151936,
3030
"max_cache_length": 2048,
31-
"linear_impl_type": "QNN_LPBQ_w4a16o16_G32"
31+
"linear_impl_type": "QNN_LPBQ_w4a16o16_G16"
3232
}

mllm-kernel/.clang-format

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
Language: Cpp
2+
AccessModifierOffset: -1
3+
AlignAfterOpenBracket: Align
4+
AlignConsecutiveAssignments: false
5+
AlignConsecutiveDeclarations: false
6+
AlignEscapedNewlinesLeft: true
7+
AlignOperands: true
8+
AlignTrailingComments: true
9+
AllowAllParametersOfDeclarationOnNextLine: true
10+
AllowShortBlocksOnASingleLine: true
11+
AllowShortCaseLabelsOnASingleLine: true
12+
AllowShortFunctionsOnASingleLine: All
13+
AllowShortIfStatementsOnASingleLine: true
14+
AllowShortLoopsOnASingleLine: true
15+
AlwaysBreakAfterDefinitionReturnType: None
16+
AlwaysBreakAfterReturnType: None
17+
AlwaysBreakBeforeMultilineStrings: false
18+
AlwaysBreakTemplateDeclarations: true
19+
BinPackArguments: true
20+
BinPackParameters: true
21+
BraceWrapping:
22+
AfterClass: true
23+
AfterControlStatement: false
24+
AfterEnum: false
25+
AfterFunction: false
26+
AfterNamespace: false
27+
AfterObjCDeclaration: false
28+
AfterStruct: false
29+
AfterUnion: false
30+
BeforeCatch: false
31+
BeforeElse: false
32+
IndentBraces: false
33+
BreakBeforeBinaryOperators: NonAssignment
34+
BreakBeforeBraces: Attach
35+
BreakBeforeTernaryOperators: true
36+
BreakConstructorInitializersBeforeComma: false
37+
BreakAfterJavaFieldAnnotations: false
38+
BreakStringLiterals: true
39+
ColumnLimit: 128
40+
CommentPragmas: "^ IWYU pragma:"
41+
BreakBeforeInheritanceComma: false
42+
ConstructorInitializerAllOnOneLineOrOnePerLine: true
43+
ConstructorInitializerIndentWidth: 4
44+
ContinuationIndentWidth: 4
45+
Cpp11BracedListStyle: true
46+
DisableFormat: false
47+
ExperimentalAutoDetectBinPacking: false
48+
FixNamespaceComments: true
49+
ForEachMacros: [foreach, Q_FOREACH, BOOST_FOREACH]
50+
IncludeCategories:
51+
- Regex: '^<.*\.h>'
52+
Priority: 1
53+
- Regex: "^<.*"
54+
Priority: 2
55+
- Regex: ".*"
56+
Priority: 3
57+
IncludeIsMainRegex: "([-_](test|unittest))?$"
58+
IndentCaseLabels: true
59+
IndentWidth: 2
60+
IndentWrappedFunctionNames: false
61+
JavaScriptQuotes: Leave
62+
JavaScriptWrapImports: true
63+
KeepEmptyLinesAtTheStartOfBlocks: false
64+
MacroBlockBegin: ""
65+
MacroBlockEnd: ""
66+
MaxEmptyLinesToKeep: 1
67+
NamespaceIndentation: None
68+
ObjCBlockIndentWidth: 2
69+
ObjCSpaceAfterProperty: false
70+
ObjCSpaceBeforeProtocolList: false
71+
PenaltyBreakBeforeFirstCallParameter: 1
72+
PenaltyBreakComment: 300
73+
PenaltyBreakFirstLessLess: 120
74+
PenaltyBreakString: 1000
75+
PenaltyExcessCharacter: 1000000
76+
PenaltyReturnTypeOnItsOwnLine: 200
77+
PointerAlignment: Left
78+
ReflowComments: true
79+
SortIncludes: false
80+
SpaceAfterCStyleCast: false
81+
SpaceAfterTemplateKeyword: false
82+
SpaceBeforeAssignmentOperators: true
83+
SpaceBeforeParens: ControlStatements
84+
SpaceInEmptyParentheses: false
85+
SpacesBeforeTrailingComments: 2
86+
SpacesInAngles: false
87+
SpacesInContainerLiterals: true
88+
SpacesInCStyleCastParentheses: false
89+
SpacesInParentheses: false
90+
SpacesInSquareBrackets: false
91+
Standard: Auto
92+
TabWidth: 8
93+
UseTab: Never

mllm-kernel/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
build/
2+
build-py/
3+
.vscode/settings.json
4+
compile_commands.json
5+
.clangd

mllm-kernel/CMakeLists.txt

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
cmake_minimum_required(VERSION 3.21)
2+
project(
3+
mllm_kernel
4+
VERSION 1.0.0
5+
LANGUAGES CXX C
6+
)
7+
8+
set(CMAKE_CXX_STANDARD 20)
9+
set(CMAKE_CUDA_STANDARD 20)
10+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
11+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
12+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
13+
14+
# Options
15+
option(MLLM_KERNEL_BUILD_CPU "Build CPU kernels with Highway SIMD" ON)
16+
option(MLLM_KERNEL_BUILD_CUDA "Build CUDA kernels" OFF)
17+
option(MLLM_KERNEL_BUILD_ASCEND "Build Ascend kernels" OFF)
18+
19+
# Include CPM for package management
20+
include(cmake/CPM.cmake)
21+
22+
# ============================================================================
23+
# Highway SIMD Library (for CPU kernels)
24+
# ============================================================================
25+
if(MLLM_KERNEL_BUILD_CPU)
26+
# Disable Highway tests and examples to speed up build
27+
set(HWY_ENABLE_TESTS OFF CACHE BOOL "Disable Highway tests" FORCE)
28+
set(HWY_ENABLE_EXAMPLES OFF CACHE BOOL "Disable Highway examples" FORCE)
29+
set(HWY_ENABLE_CONTRIB OFF CACHE BOOL "Disable Highway contrib" FORCE)
30+
set(BUILD_TESTING OFF CACHE BOOL "Disable testing" FORCE)
31+
32+
CPMAddPackage(
33+
NAME highway
34+
GITHUB_REPOSITORY google/highway
35+
GIT_TAG 1.3.0
36+
OPTIONS
37+
"HWY_ENABLE_TESTS OFF"
38+
"HWY_ENABLE_EXAMPLES OFF"
39+
"HWY_ENABLE_CONTRIB OFF"
40+
)
41+
42+
if(highway_ADDED)
43+
message(STATUS "Highway SIMD library added from: ${highway_SOURCE_DIR}")
44+
45+
# Create an interface library to expose Highway includes
46+
add_library(mllm_kernel_highway INTERFACE)
47+
target_link_libraries(mllm_kernel_highway INTERFACE hwy)
48+
target_include_directories(mllm_kernel_highway INTERFACE
49+
$<BUILD_INTERFACE:${highway_SOURCE_DIR}>
50+
$<INSTALL_INTERFACE:include>
51+
)
52+
endif()
53+
endif()
54+
55+
# ============================================================================
56+
# Installation
57+
# ============================================================================
58+
59+
# Install Highway headers for JIT compilation
60+
if(MLLM_KERNEL_BUILD_CPU AND highway_ADDED)
61+
# Install Highway headers
62+
install(
63+
DIRECTORY ${highway_SOURCE_DIR}/hwy
64+
DESTINATION include
65+
FILES_MATCHING
66+
PATTERN "*.h"
67+
PATTERN "*.inc"
68+
)
69+
70+
# Install Highway library
71+
install(
72+
TARGETS hwy
73+
EXPORT MllmKernelTargets
74+
LIBRARY DESTINATION lib
75+
ARCHIVE DESTINATION lib
76+
RUNTIME DESTINATION bin
77+
)
78+
79+
# Install the interface target
80+
install(
81+
TARGETS mllm_kernel_highway
82+
EXPORT MllmKernelTargets
83+
)
84+
endif()
85+
86+
# Install CPU kernel includes
87+
install(
88+
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mllm_kernel/cpu/include/
89+
DESTINATION include
90+
FILES_MATCHING
91+
PATTERN "*.h"
92+
PATTERN "*.hpp"
93+
)
94+
95+
# Install CPU kernel sources (for JIT compilation)
96+
install(
97+
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mllm_kernel/cpu/csrc/
98+
DESTINATION share/mllm_kernel/cpu/csrc
99+
FILES_MATCHING
100+
PATTERN "*.cpp"
101+
PATTERN "*.hpp"
102+
PATTERN "*.h"
103+
)
104+
105+
# Export targets
106+
install(
107+
EXPORT MllmKernelTargets
108+
FILE MllmKernelTargets.cmake
109+
NAMESPACE mllm_kernel::
110+
DESTINATION lib/cmake/mllm_kernel
111+
)
112+
113+
# Create and install config file
114+
include(CMakePackageConfigHelpers)
115+
configure_package_config_file(
116+
${CMAKE_CURRENT_SOURCE_DIR}/cmake/MllmKernelConfig.cmake.in
117+
${CMAKE_CURRENT_BINARY_DIR}/MllmKernelConfig.cmake
118+
INSTALL_DESTINATION lib/cmake/mllm_kernel
119+
)
120+
121+
write_basic_package_version_file(
122+
${CMAKE_CURRENT_BINARY_DIR}/MllmKernelConfigVersion.cmake
123+
VERSION ${PROJECT_VERSION}
124+
COMPATIBILITY SameMajorVersion
125+
)
126+
127+
install(
128+
FILES
129+
${CMAKE_CURRENT_BINARY_DIR}/MllmKernelConfig.cmake
130+
${CMAKE_CURRENT_BINARY_DIR}/MllmKernelConfigVersion.cmake
131+
DESTINATION lib/cmake/mllm_kernel
132+
)
133+
134+
# Print summary
135+
message(STATUS "")
136+
message(STATUS "=== mllm-kernel Configuration ===")
137+
message(STATUS " Version: ${PROJECT_VERSION}")
138+
message(STATUS " CPU kernels: ${MLLM_KERNEL_BUILD_CPU}")
139+
message(STATUS " CUDA kernels: ${MLLM_KERNEL_BUILD_CUDA}")
140+
message(STATUS " Ascend kernels: ${MLLM_KERNEL_BUILD_ASCEND}")
141+
if(MLLM_KERNEL_BUILD_CPU AND highway_ADDED)
142+
message(STATUS " Highway version: 1.3.0")
143+
message(STATUS " Highway source: ${highway_SOURCE_DIR}")
144+
endif()
145+
message(STATUS "=================================")
146+
message(STATUS "")

0 commit comments

Comments
 (0)