Skip to content

Commit 5486534

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 12bc8df + 1461a16 commit 5486534

File tree

141 files changed

+5722
-3308
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+5722
-3308
lines changed

cgmanifests/generated/cgmanifest.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@
136136
"component": {
137137
"type": "git",
138138
"git": {
139-
"commitHash": "309b75c9e56e0a674bf78d59872ce131f814dfb6",
139+
"commitHash": "fe98e0b93565382648129271381c14d6205255e3",
140140
"repositoryUrl": "https://github.com/google/XNNPACK.git"
141141
},
142142
"comments": "googlexnnpack"
@@ -226,8 +226,8 @@
226226
"component": {
227227
"type": "git",
228228
"git": {
229-
"commitHash": "4fe0e1e183925bf8cfa6aae24237e724a96479b8",
230-
"repositoryUrl": "https://github.com/Maratyszcza/pthreadpool.git"
229+
"commitHash": "4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0",
230+
"repositoryUrl": "https://github.com/google/pthreadpool.git"
231231
},
232232
"comments": "pthreadpool"
233233
}
@@ -246,7 +246,7 @@
246246
"component": {
247247
"type": "git",
248248
"git": {
249-
"commitHash": "ca678952a9a8eaa6de112d154e8e104b22f9ab3f",
249+
"commitHash": "8a1772a0c5c447df2d18edf33ec4603a8c9c04a6",
250250
"repositoryUrl": "https://github.com/pytorch/cpuinfo.git"
251251
},
252252
"comments": "pytorch_cpuinfo"

cmake/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ else()
680680
check_cxx_compiler_flag(-Wdeprecated-builtins HAS_DEPRECATED_BUILTINS)
681681
check_cxx_compiler_flag(-Wdeprecated-copy HAS_DEPRECATED_COPY)
682682
check_cxx_compiler_flag(-Wdeprecated-declarations HAS_DEPRECATED_DECLARATIONS)
683+
check_cxx_compiler_flag(-Wdeprecated-literal-operator HAS_DEPRECATED_LITERAL_OPERATOR)
683684
check_cxx_compiler_flag(-Wdeprecated-this-capture HAS_DEPRECATED_THIS_CAPTURE)
684685
check_cxx_compiler_flag(-Wenum-constexpr-conversion HAS_ENUM_CONSTEXPR_CONVERSION)
685686
check_cxx_compiler_flag(-Wformat-truncation HAS_FORMAT_TRUNCATION)
@@ -736,6 +737,9 @@ else()
736737
if (HAS_DEPRECATED_BUILTINS)
737738
list(APPEND ORT_WARNING_FLAGS -Wno-deprecated-builtins)
738739
endif()
740+
if (HAS_DEPRECATED_LITERAL_OPERATOR)
741+
list(APPEND ORT_WARNING_FLAGS -Wno-deprecated-literal-operator)
742+
endif()
739743
#see:https://reviews.llvm.org/D131307
740744
#It was intended that the 'enum-constexpr-conversion' type warnings can not be silenced by -w
741745
if(HAS_ENUM_CONSTEXPR_CONVERSION AND NOT Protobuf_FOUND)

cmake/deps.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34
2929
google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.8.5.zip;cd47d3d272faf353600c8cc2fdec2b52d6f69177
3030
googletest;https://github.com/google/googletest/archive/refs/tags/v1.15.0.zip;9d2d0af8d77ac726ea55d44a8fa727ec98311349
3131
#xnnpack 2024.09.04
32-
googlexnnpack;https://github.com/google/XNNPACK/archive/309b75c9e56e0a674bf78d59872ce131f814dfb6.zip;39FA5259EAEACE0547284B63D5CEDC4F05553F5A
32+
googlexnnpack;https://github.com/google/XNNPACK/archive/fe98e0b93565382648129271381c14d6205255e3.zip;14f61dcf17cec2cde34ba2dcf61d6f24bf6059f3
3333
json;https://github.com/nlohmann/json/archive/refs/tags/v3.10.5.zip;f257f8dc27c5b8c085dc887b40cddd18ae1f725c
3434
microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf368104cd22a87b4dd0c80228919bb2df3e2a14
3535
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
@@ -46,9 +46,9 @@ protoc_linux_x86;https://github.com/protocolbuffers/protobuf/releases/download/v
4646
protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-linux-aarch_64.zip;df9d45470b0b8cf939dd2f0ec6b88e9cafc4d617
4747
protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
4848
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
49-
pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.zip;07a0aa91dd9bf86f31b95497e00f31d8a261a4bd
49+
pthreadpool;https://github.com/google/pthreadpool/archive/4e80ca24521aa0fb3a746f9ea9c3eaa20e9afbb0.zip;bd4ea65c8292801e9555b527a0ecbb2e0092c917
5050
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.zip;9255d5c8568debcc329dd42ed8f410ee139ac7b1
51-
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/ca678952a9a8eaa6de112d154e8e104b22f9ab3f.zip;138bf57d2a110935330d1048dce6d7b82d17d377
51+
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/8a1772a0c5c447df2d18edf33ec4603a8c9c04a6.zip;85bf8a60dae026b99b6ccd78606c85ed83bfb2cd
5252
re2;https://github.com/google/re2/archive/refs/tags/2024-07-02.zip;646e1728269cde7fcef990bf4a8e87b047882e88
5353
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
5454
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
@@ -59,4 +59,4 @@ composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/arch
5959
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
6060
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1
6161
dawn;https://github.com/google/dawn/archive/12a3b24c456cebd9fd11f23ac0164f78129b00c6.zip;ad428f6dc16f1336d584f7bad5714e1097dafc43
62-
kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/v0.2.0/kleidiai-v0.2.0.zip;B1E3173992FD91F20DB904AB77D6E901778C2681
62+
kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/d15722976120710080ca098fe8ddabf4556cb40f/kleidiai-d15722976120710080ca098fe8ddabf4556cb40f.zip;d6c840d00c3b05aedf06e957ddaece1013d1f40b

cmake/external/xnnpack.cmake

+1-40
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
set(XNNPACK_USE_SYSTEM_LIBS ON CACHE INTERNAL "")
22
set(XNNPACK_BUILD_TESTS OFF CACHE INTERNAL "")
33
set(XNNPACK_BUILD_BENCHMARKS OFF CACHE INTERNAL "")
4-
set(FP16_BUILD_TESTS OFF CACHE INTERNAL "")
5-
set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "")
4+
65
set(PTHREADPOOL_BUILD_TESTS OFF CACHE INTERNAL "")
76
set(PTHREADPOOL_BUILD_BENCHMARKS OFF CACHE INTERNAL "")
87
set(KLEIDIAI_BUILD_TESTS OFF CACHE INTERNAL "")
@@ -17,44 +16,6 @@ if(CMAKE_ANDROID_ARCH_ABI STREQUAL armeabi-v7a)
1716
set(XNNPACK_ENABLE_ARM_BF16 OFF)
1817
endif()
1918

20-
# fp16 depends on psimd
21-
FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd})
22-
onnxruntime_fetchcontent_makeavailable(psimd)
23-
set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR})
24-
25-
block(PROPAGATE fp16_PATCH_COMMAND)
26-
# only apply fp16 patch for Apple x86_64 targets
27-
28-
if(APPLE)
29-
if(NOT "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "")
30-
if ("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES)
31-
set(fp16_PATCH_REQUIRED 1)
32-
endif()
33-
else()
34-
# CMAKE_OSX_ARCHITECTURES unspecified, check host
35-
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
36-
set(fp16_PATCH_REQUIRED 1)
37-
endif()
38-
endif()
39-
endif()
40-
41-
if(fp16_PATCH_REQUIRED)
42-
message(STATUS "Applying fp16 patch.")
43-
set(fp16_PATCH_FILE ${PROJECT_SOURCE_DIR}/patches/fp16/remove_math_h_dependency_from_fp16_h.patch)
44-
set(fp16_PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${fp16_PATCH_FILE})
45-
else()
46-
set(fp16_PATCH_COMMAND "")
47-
endif()
48-
endblock()
49-
50-
FetchContent_Declare(
51-
fp16
52-
URL ${DEP_URL_fp16}
53-
URL_HASH SHA1=${DEP_SHA1_fp16}
54-
PATCH_COMMAND ${fp16_PATCH_COMMAND}
55-
)
56-
onnxruntime_fetchcontent_makeavailable(fp16)
57-
5819
# pthreadpool depends on fxdiv
5920
FetchContent_Declare(fxdiv URL ${DEP_URL_fxdiv} URL_HASH SHA1=${DEP_SHA1_fxdiv})
6021
onnxruntime_fetchcontent_makeavailable(fxdiv)

cmake/onnxruntime_config.h.in

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#cmakedefine HAS_CLASS_MEMACCESS
1010
#cmakedefine HAS_DEPRECATED_COPY
1111
#cmakedefine HAS_DEPRECATED_DECLARATIONS
12+
#cmakedefine HAS_DEPRECATED_LITERAL_OPERATOR
1213
#cmakedefine HAS_DEPRECATED_THIS_CAPTURE
1314
#cmakedefine HAS_FORMAT_TRUNCATION
1415
#cmakedefine HAS_IGNORED_ATTRIBUTES

cmake/onnxruntime_providers_coreml.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ endif()
177177
if (_enable_ML_PROGRAM)
178178
# Setup coremltools fp16 and json dependencies for creating an mlpackage.
179179
#
180-
# These are also used by external/xnnpack.cmake. fp16 depends on psimd
180+
# fp16 depends on psimd
181181
FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd})
182182
onnxruntime_fetchcontent_makeavailable(psimd)
183183
set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR})

cmake/onnxruntime_unittests.cmake

-8
Original file line numberDiff line numberDiff line change
@@ -221,19 +221,11 @@ function(AddTest)
221221
)
222222
else()
223223
set(TEST_NODE_FLAGS)
224-
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
225-
list(APPEND TEST_NODE_FLAGS "--experimental-wasm-threads")
226-
endif()
227-
if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
228-
list(APPEND TEST_NODE_FLAGS "--experimental-wasm-simd")
229-
endif()
230224

231225
# prefer Node from emsdk so the version is more deterministic
232226
if (DEFINED ENV{EMSDK_NODE})
233227
set(NODE_EXECUTABLE $ENV{EMSDK_NODE})
234228
else()
235-
# warning as we don't know what node version is being used and whether things like the TEST_NODE_FLAGS
236-
# will be valid. e.g. "--experimental-wasm-simd" is not valid with node v20 or later.
237229
message(WARNING "EMSDK_NODE environment variable was not set. Falling back to system `node`.")
238230
set(NODE_EXECUTABLE node)
239231
endif()

cmake/onnxruntime_webassembly.cmake

+7-2
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,15 @@ jsepDownload:_pp_")
380380
"SHELL:--pre-js \"${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js\""
381381
"SHELL:-s ASYNCIFY=1"
382382
"SHELL:-s ASYNCIFY_STACK_SIZE=65536"
383-
"SHELL:-s ASYNCIFY_EXPORTS=['OrtRun']"
384-
"SHELL:-s ASYNCIFY_IMPORTS=['Module.jsepCopy','Module.jsepCopyAsync','jsepDownload']"
385383
)
386384
set_target_properties(onnxruntime_webassembly PROPERTIES LINK_DEPENDS ${ONNXRUNTIME_ROOT}/wasm/pre-jsep.js)
385+
386+
if (onnxruntime_ENABLE_WEBASSEMBLY_MEMORY64)
387+
target_link_options(onnxruntime_webassembly PRIVATE
388+
"SHELL:-s ASYNCIFY_EXPORTS=['OrtRun']"
389+
"SHELL:-s ASYNCIFY_IMPORTS=['Module.jsepCopy','Module.jsepCopyAsync','jsepDownload']"
390+
)
391+
endif()
387392
endif()
388393

389394
if (onnxruntime_EMSCRIPTEN_SETTINGS)

cmake/patches/xnnpack/AddEmscriptenAndIosSupport.patch

+12-12
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index 1ff85b538..c3ef2183f 100644
2+
index f0b3410ae..1e3cb8178 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
5-
@@ -253,7 +253,7 @@ ENDIF()
5+
@@ -337,7 +337,7 @@ ENDIF()
66
# ---[ Build flags
77
IF(NOT CMAKE_SYSTEM_NAME)
88
MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
@@ -11,21 +11,21 @@ index 1ff85b538..c3ef2183f 100644
1111
MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"")
1212
ENDIF()
1313
IF(CMAKE_SYSTEM_NAME MATCHES "Windows")
14-
@@ -763,7 +763,12 @@ IF(XNNPACK_BUILD_LIBRARY)
15-
TARGET_LINK_LIBRARIES(operator-run PRIVATE xnnpack-base logging)
14+
@@ -848,7 +848,12 @@ IF(XNNPACK_BUILD_LIBRARY)
1615
TARGET_LINK_LIBRARIES(operator-utils PRIVATE xnnpack-base logging)
17-
TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run)
18-
- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph)
16+
TARGET_LINK_LIBRARIES(reference-ukernels PRIVATE xnnpack-base)
17+
TARGET_LINK_LIBRARIES(subgraph PRIVATE xnnpack-base allocator logging memory mutex operators operator-run datatype)
18+
- TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels)
1919
+ IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
20-
+ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake
21-
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph)
20+
+ # omit microkernels-prod as the list is manually created by ORT in cmake/external/xnnpack.cmake
21+
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing subgraph datatype reference-ukernels)
2222
+ ELSE()
23-
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE allocator cache hardware-config indirection logging memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph)
24-
+ ENDIF()
25-
TARGET_LINK_LIBRARIES(XNNPACK PUBLIC xnnpack-base)
23+
+ TARGET_LINK_LIBRARIES(XNNPACK PRIVATE xnnpack-base allocator cache hardware-config indirection memory microkernel-utils microparams-init mutex normalization operators operator-run operator-utils packing microkernels-prod subgraph datatype reference-ukernels)
24+
+ ENDIF()
25+
TARGET_LINK_LIBRARIES(XNNPACK PUBLIC pthreadpool logging)
2626
SET_TARGET_PROPERTIES(XNNPACK PROPERTIES C_EXTENSIONS YES)
2727
ENDIF()
28-
@@ -772,7 +777,8 @@ IF(NOT MSVC)
28+
@@ -857,7 +862,8 @@ IF(NOT MSVC)
2929
ENDIF()
3030
IF(XNNPACK_TARGET_PROCESSOR STREQUAL "arm")
3131
SET_PROPERTY(SOURCE ${ALL_MICROKERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ")

dockerfiles/Dockerfile.cuda

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ ARG OS=ubuntu24.04
1212
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${OS}
1313
ARG CUDA_VERSION
1414
ARG CUDNN_VERSION
15-
ARG CMAKE_CUDA_ARCHITECTURES="61;70;75;80;86;90"
15+
# Adjust as needed
16+
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
17+
ARG CMAKE_CUDA_ARCHITECTURES="75;80;90"
1618

1719
ENV DEBIAN_FRONTEND=noninteractive
1820

dockerfiles/Dockerfile.tensorrt

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ FROM nvcr.io/nvidia/tensorrt:${TRT_CONTAINER_VERSION}-py3
1010

1111
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
1212
ARG ONNXRUNTIME_BRANCH=main
13-
ARG CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80
13+
# Adjust as needed
14+
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
15+
ARG CMAKE_CUDA_ARCHITECTURES=75;80;90
1416

1517
RUN apt-get update &&\
1618
apt-get install -y sudo git bash unattended-upgrades

docs/python/examples/plot_train_convert_predict.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ def sess_predict_proba_rf(x):
212212
rf.fit(X_train, y_train)
213213
initial_type = [("float_input", FloatTensorType([1, 4]))]
214214
onx = convert_sklearn(rf, initial_types=initial_type)
215-
with open("rf_iris_%d.onnx" % n_trees, "wb") as f:
215+
with open(f"rf_iris_{n_trees}.onnx", "wb") as f:
216216
f.write(onx.SerializeToString())
217-
sess = rt.InferenceSession("rf_iris_%d.onnx" % n_trees, providers=rt.get_available_providers())
217+
sess = rt.InferenceSession(f"rf_iris_{n_trees}.onnx", providers=rt.get_available_providers())
218218

219219
def sess_predict_proba_loop(x):
220220
return sess.run([prob_name], {input_name: x.astype(numpy.float32)})[0] # noqa: B023

include/onnxruntime/core/framework/allocator.h

+5
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
5252
constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
5353
constexpr const char* OpenVINO_RT = "OpenVINO_RT";
5454
constexpr const char* OpenVINO_RT_NPU = "OpenVINO_RT_NPU";
55+
constexpr const char* QNN_HTP_SHARED = "QnnHtpShared";
5556
constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
5657
constexpr const char* WEBNN_TENSOR = "WebNN_Tensor";
5758

@@ -81,6 +82,10 @@ class IAllocator {
8182
*/
8283
virtual void* Alloc(size_t size) = 0;
8384

85+
/**
86+
* Free memory at p.
87+
* If p is nullptr, do nothing.
88+
*/
8489
virtual void Free(void* p) = 0;
8590

8691
// Reserve() is an interface exposed for an implementation of IAllocator

include/onnxruntime/core/framework/float16.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -261,19 +261,19 @@ struct BFloat16 : onnxruntime_float16::BFloat16Impl<BFloat16> {
261261
// initializers with MLFloat16 and BFloat16 from unsigned short
262262
// E.g 10_f16 or 10_b16
263263
#if !defined(__CUDACC__) && !defined(__HIPCC__)
264-
inline MLFloat16 operator"" _f16(unsigned long long int v) noexcept {
264+
inline MLFloat16 operator""_f16(unsigned long long int v) noexcept {
265265
return MLFloat16::FromBits(narrow<uint16_t>(v));
266266
}
267267

268-
inline MLFloat16 operator"" _fp16(long double v) noexcept {
268+
inline MLFloat16 operator""_fp16(long double v) noexcept {
269269
return MLFloat16(static_cast<float>(v));
270270
}
271271

272-
inline BFloat16 operator"" _b16(unsigned long long int v) noexcept {
272+
inline BFloat16 operator""_b16(unsigned long long int v) noexcept {
273273
return BFloat16::FromBits((narrow<uint16_t>(v)));
274274
}
275275

276-
inline BFloat16 operator"" _bfp16(long double v) noexcept {
276+
inline BFloat16 operator""_bfp16(long double v) noexcept {
277277
return BFloat16(static_cast<float>(v));
278278
}
279279
#endif

include/onnxruntime/core/framework/float8.h

+8-8
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E4M3FN& left, const Float8E4M3
165165
// initializers with MLFloat8E4M3FN and Float8E4M3FN from unsigned char
166166
#if !defined(__CUDACC__) && !defined(__HIPCC__)
167167

168-
inline Float8E4M3FN operator"" _f8e4m3fn(unsigned long long int v) {
168+
inline Float8E4M3FN operator""_f8e4m3fn(unsigned long long int v) {
169169
return Float8E4M3FN(narrow<uint8_t>(v), Float8E4M3FN::FromBits());
170170
}
171171

172-
inline Float8E4M3FN operator"" _f8e4m3fnp8(long double v) {
172+
inline Float8E4M3FN operator""_f8e4m3fnp8(long double v) {
173173
return Float8E4M3FN(static_cast<float>(v), true);
174174
}
175175

@@ -323,11 +323,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E4M3FNUZ& left, const Float8E4
323323
// initializers with MLFloat8E4M3FN and Float8E4M3FN from unsigned char
324324
#if !defined(__CUDACC__) && !defined(__HIPCC__)
325325

326-
inline Float8E4M3FNUZ operator"" _f8e4m3p8fnuz(unsigned long long int v) {
326+
inline Float8E4M3FNUZ operator""_f8e4m3p8fnuz(unsigned long long int v) {
327327
return Float8E4M3FNUZ(narrow<uint8_t>(v), Float8E4M3FNUZ::FromBits());
328328
}
329329

330-
inline Float8E4M3FNUZ operator"" _f8e4m3fnuzp8(long double v) {
330+
inline Float8E4M3FNUZ operator""_f8e4m3fnuzp8(long double v) {
331331
return Float8E4M3FNUZ(static_cast<float>(v), true);
332332
}
333333

@@ -493,11 +493,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E5M2& left, const Float8E5M2&
493493
// initializers with MLFloat8E5M2 and Float8E5M2 from unsigned char
494494
#if !defined(__CUDACC__) && !defined(__HIPCC__)
495495

496-
inline Float8E5M2 operator"" _f8e5m2fn(unsigned long long int v) {
496+
inline Float8E5M2 operator""_f8e5m2fn(unsigned long long int v) {
497497
return Float8E5M2(narrow<uint8_t>(v), Float8E5M2::FromBits());
498498
}
499499

500-
inline Float8E5M2 operator"" _f8e5m2fnp8(long double v) {
500+
inline Float8E5M2 operator""_f8e5m2fnp8(long double v) {
501501
return Float8E5M2(static_cast<float>(v), true);
502502
}
503503

@@ -642,11 +642,11 @@ inline ORT_HOST_DEVICE bool operator<(const Float8E5M2FNUZ& left, const Float8E5
642642
// initializers with MLFloat8E5M2 and Float8E5M2 from unsigned char
643643
#if !defined(__CUDACC__) && !defined(__HIPCC__)
644644

645-
inline Float8E5M2FNUZ operator"" _f8e5m2fnuz(unsigned long long int v) {
645+
inline Float8E5M2FNUZ operator""_f8e5m2fnuz(unsigned long long int v) {
646646
return Float8E5M2FNUZ(narrow<uint8_t>(v), Float8E5M2FNUZ::FromBits());
647647
}
648648

649-
inline Float8E5M2FNUZ operator"" _f8e5m2fnuzp8(long double v) {
649+
inline Float8E5M2FNUZ operator""_f8e5m2fnuzp8(long double v) {
650650
return Float8E5M2FNUZ(static_cast<float>(v), true);
651651
}
652652

include/onnxruntime/core/framework/ortdevice.h

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ struct OrtDevice {
2525
static const MemoryType CUDA_PINNED = 1;
2626
static const MemoryType HIP_PINNED = 2;
2727
static const MemoryType CANN_PINNED = 3;
28+
static const MemoryType QNN_HTP_SHARED = 4;
2829
};
2930

3031
constexpr OrtDevice(DeviceType device_type_, MemoryType memory_type_, DeviceId device_id_)

include/onnxruntime/core/framework/ortmemoryinfo.h

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <string_view>
77

88
#include "core/common/hash_combine.h"
9+
#include "core/framework/ortdevice.h"
10+
#include "core/session/onnxruntime_c_api.h" // for OrtMemType, OrtAllocatorType
911

1012
struct OrtMemoryInfo {
1113
OrtMemoryInfo() = default; // to allow default construction of Tensor

include/onnxruntime/core/session/onnxruntime_c_api.h

+4
Original file line numberDiff line numberDiff line change
@@ -3670,6 +3670,10 @@ struct OrtApi {
36703670
* "enable_htp_spill_fill_buffer": Enable HTP spill fill buffer setting. The flag is used while generating context binary.
36713671
* - "0": Default. Disabled.
36723672
* - "1": Enabled.
3673+
* "enable_htp_shared_memory_allocator": Enable the QNN HTP shared memory allocator. Requires libcdsprpc.so/dll to
3674+
* be available.
3675+
* - "0": Default. Disabled.
3676+
* - "1": Enabled.
36733677
*
36743678
* SNPE supported keys:
36753679
* "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",

0 commit comments

Comments
 (0)