Skip to content

Commit 15219e2

Browse files
authored
turn on neural_speed by default (microsoft#19627)
### Description <!-- Describe your changes. --> the crash caused by the neural_speed turns out to be a very corn case. Turn it on by default. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent 6b305f9 commit 15219e2

File tree

7 files changed

+38
-6
lines changed

7 files changed

+38
-6
lines changed

cgmanifests/generated/cgmanifest.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@
206206
"component": {
207207
"type": "git",
208208
"git": {
209-
"commitHash": "c11386eb632eec7c1c2aa323142f73519f946e2a",
209+
"commitHash": "150e7527d5286ddd3a995c228dedf8d76a7a86bc",
210210
"repositoryUrl": "https://github.com/intel/neural-speed.git"
211211
},
212212
"comments": "neural_speed"

cmake/CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ option(onnxruntime_USE_QNN "Build with QNN support" OFF)
8888
option(onnxruntime_USE_SNPE "Build with SNPE support" OFF)
8989
option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
9090
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
91-
option(onnxruntime_USE_NEURAL_SPEED "Build with Neural Speed support" OFF)
91+
option(onnxruntime_USE_NEURAL_SPEED "Build with Neural Speed support" ON)
9292
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
9393
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
9494
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
@@ -1206,7 +1206,7 @@ if (onnxruntime_USE_DNNL)
12061206
add_compile_definitions(DNNL_OPENMP)
12071207
endif()
12081208

1209-
if (onnxruntime_USE_NEURAL_SPEED AND NOT onnxruntime_MINIMAL_BUILD)
1209+
if (onnxruntime_USE_NEURAL_SPEED AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_USE_TVM)
12101210
include(neural_speed)
12111211
if (USE_NEURAL_SPEED)
12121212
list(APPEND onnxruntime_EXTERNAL_LIBRARIES neural_speed::bestla)

cmake/deps.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36
3535
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
3636
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
3737
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
38-
neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/bestlav0.1.1.zip;65b0f7a0d04f72f0d5a8d48af70f0366f2ab3939
38+
neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/v0.3.zip;5ec64e3071edc7347ebd8a81679cf06e2bb9b851
3939
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.15.0.zip;54c3f960a0541c5d8d3e60c2933e11f5d3688a11
4040
#use the commit of Final DDS removal. DDS output is now supported by ORT TRT.
4141
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/bacfaaa951653cd4e72efe727a543567cb38f7de.zip;26434329612e804164ab7baa6ae629ada56c1b26

cmake/external/neural_speed.cmake

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ if(USE_NEURAL_SPEED)
99
neural_speed
1010
URL ${DEP_URL_neural_speed}
1111
URL_HASH SHA1=${DEP_SHA1_neural_speed}
12+
PATCH_COMMAND ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/neural_speed/150e7527d5286ddd3a995c228dedf8d76a7a86bc.patch
1213
)
1314
set(BTLA_USE_OPENMP OFF)
1415
onnxruntime_fetchcontent_makeavailable(neural_speed)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
diff --git a/bestla/bestla/bestla_prologue_b.h b/bestla/bestla/bestla_prologue_b.h
2+
index 99f3ccc..a11de9d 100644
3+
--- a/bestla/bestla/bestla_prologue_b.h
4+
+++ b/bestla/bestla/bestla_prologue_b.h
5+
@@ -456,9 +456,8 @@ class WeightKBlockNInteger {
6+
auto tmpscales = tmp;
7+
auto tmpzeropoints = reinterpret_cast<int8_t*>(tmpscales + N * blks);
8+
if (scales) {
9+
- for (size_t i = 0; i < N * blks; i += 2) {
10+
+ for (size_t i = 0; i < N * blks; i ++) {
11+
tmpscales[i] = scales[i] / 16;
12+
- tmpscales[i + 1] = scales[i + 1] / 16;
13+
}
14+
}
15+
if (zero_points) {
16+
diff --git a/bestla/bestla/kernel_avx512f.h b/bestla/bestla/kernel_avx512f.h
17+
index 6783ee8..59822e5 100644
18+
--- a/bestla/bestla/kernel_avx512f.h
19+
+++ b/bestla/bestla/kernel_avx512f.h
20+
@@ -673,8 +673,8 @@ inline BTLA_CODE decompress_kblock_s3_s8fp(utils::bit2x4* bit2ptr, utils::bit1x8
21+
zmm1 = _mm512_sllv_epi32(zmm1, zmm_shift); // int3_clip => int8
22+
zmm2 = _mm512_sllv_epi32(zmm2, zmm_shift); // int3_clip => int8
23+
24+
- _mm512_storeu_epi8((__m512i*)dst, zmm1);
25+
- _mm512_storeu_epi8((__m512i*)(dst + 64), zmm2);
26+
+ _mm512_storeu_si512((__m512i*)dst, zmm1);
27+
+ _mm512_storeu_si512((__m512i*)(dst + 64), zmm2);
28+
};
29+
30+
assert(head_ignore_num % 8 == 0);

onnxruntime/contrib_ops/cpu/quantization/neural_speed_wrapper.h

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#pragma warning(disable : 4244)
2828
#pragma warning(disable : 4267)
2929
#pragma warning(disable : 4702)
30+
#pragma warning(disable : 4127)
3031
#endif
3132

3233
#include "bestla/bestla_prologue_a.h"

tools/ci_build/github/azure-pipelines/templates/download-deps.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ steps:
1111
packageType: upack
1212
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
1313
definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0'
14-
version: 1.0.143
14+
version: 1.0.145
1515
downloadPath: $(Build.BinariesDirectory)/deps
1616

1717
# The private ADO project
@@ -22,7 +22,7 @@ steps:
2222
packageType: upack
2323
feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325'
2424
definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a'
25-
version: 1.0.143
25+
version: 1.0.145
2626
downloadPath: $(Build.BinariesDirectory)/deps
2727

2828
# You can add more ADO accounts at here.

0 commit comments

Comments
 (0)