Skip to content

Commit 8bf94b3

Browse files
Merge branch 'main' into adrianl/BaseTester_SupportCompileAndKernelPluginEps
2 parents a4f505f + a3749f1 commit 8bf94b3

File tree

95 files changed

+4186
-883
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+4186
-883
lines changed

.vscode/settings.json

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,23 @@
1414
"-build/include_subdir",
1515
"-runtime/references"
1616
],
17-
"C_Cpp.autoAddFileAssociations": false
17+
"C_Cpp.autoAddFileAssociations": false,
18+
19+
// Exclude build directories and non-essential folders from C++ parsing
20+
"C_Cpp.files.exclude": {
21+
"**/build/**": true,
22+
"**/build_*/**": true,
23+
"**/cmake/external/**": true,
24+
"**/node_modules/**": true,
25+
"**/.git/**": true
26+
},
27+
28+
// Exclude from search but keep in explorer
29+
"search.exclude": {
30+
"**/build/**": true,
31+
"**/build_*/**": true,
32+
"**/cmake/external/**": true,
33+
"**/node_modules/**": true,
34+
"**/.git/**": true
35+
}
1836
}

cmake/CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,7 +1441,7 @@ get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_
14411441
if (onnxruntime_USE_CUDA)
14421442
set(CMAKE_CUDA_STANDARD 17)
14431443
if(onnxruntime_CUDA_HOME)
1444-
file(TO_CMAKE_PATH CUDAToolkit_ROOT ${onnxruntime_CUDA_HOME})
1444+
file(TO_CMAKE_PATH ${onnxruntime_CUDA_HOME} CUDAToolkit_ROOT)
14451445
endif()
14461446
find_package(CUDAToolkit REQUIRED)
14471447

@@ -1801,8 +1801,11 @@ if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
18011801
)
18021802
endif()
18031803

1804-
if(NOT onnxruntime_BUILD_SHARED_LIB AND onnxruntime_USE_WEBGPU)
1805-
message(WARNING "CMake target files will not be generated for static onnxruntime builds with webgpu support")
1804+
if (NOT onnxruntime_BUILD_SHARED_LIB AND
1805+
(onnxruntime_USE_WEBGPU OR (CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_USE_XNNPACK)))
1806+
message(WARNING
1807+
"CMake target files will not be generated for static onnxruntime builds "
1808+
"with WebGPU or Emscripten+XNNPACK support")
18061809
else()
18071810
# Install
18081811
include(CMakePackageConfigHelpers)

cmake/external/abseil-cpp.cmake

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,8 @@ set(ABSL_USE_EXTERNAL_GOOGLETEST ON)
1212

1313
# Both abseil and xnnpack create a target called memory, which
1414
# results in a duplicate target if ABSL_ENABLE_INSTALL is on.
15-
if (onnxruntime_USE_XNNPACK)
16-
set(ABSL_ENABLE_INSTALL OFF)
17-
else()
18-
if (NOT CMAKE_SYSTEM_NAME MATCHES "AIX")
15+
if (NOT CMAKE_SYSTEM_NAME MATCHES "AIX")
1916
set(ABSL_ENABLE_INSTALL ON)
20-
endif()
2117
endif()
2218

2319
if(Patch_FOUND)

cmake/external/cuda_configuration.cmake

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ macro(setup_cuda_architectures)
8585
# * Always use accelerated (`-a` suffix) target for supported real architectures.
8686
# cmake-format: on
8787

88+
# Allow override via CUDAARCHS environment variable (standard CMake variable)
89+
if(NOT CMAKE_CUDA_ARCHITECTURES AND DEFINED ENV{CUDAARCHS})
90+
set(CMAKE_CUDA_ARCHITECTURES "$ENV{CUDAARCHS}")
91+
endif()
92+
8893
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
8994
# Detect highest available compute capability
9095
set(OUTPUTFILE ${PROJECT_BINARY_DIR}/detect_cuda_arch)
@@ -142,12 +147,12 @@ macro(setup_cuda_architectures)
142147
continue()
143148
endif()
144149

145-
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+a?-virtual$")
150+
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+[af]?-virtual$")
146151
set(CMAKE_CUDA_ARCHITECTURES_LAST_VIRTUAL ${CUDA_ARCH})
147-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?-real$")
148-
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
149-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?$")
152+
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)[af]?-real$")
150153
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
154+
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)([af]?)$")
155+
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1}${CMAKE_MATCH_4})
151156
else()
152157
message(FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH}")
153158
endif()
@@ -159,7 +164,7 @@ macro(setup_cuda_architectures)
159164
set(CMAKE_CUDA_ARCHITECTURES_ORIG "${CMAKE_CUDA_ARCHITECTURES}")
160165
message(STATUS "GPU architectures: ${CMAKE_CUDA_ARCHITECTURES_ORIG}")
161166

162-
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "120")
167+
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "110" "120")
163168
foreach(CUDA_ARCH IN LISTS ARCHITECTURES_WITH_KERNELS)
164169
if(NOT "${CUDA_ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
165170
add_definitions("-DEXCLUDE_SM_${CUDA_ARCH}")
@@ -168,10 +173,13 @@ macro(setup_cuda_architectures)
168173
endforeach()
169174

170175
# Enable accelerated features (like WGMMA, TMA and setmaxnreg) for SM >= 90.
171-
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "120")
176+
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "110" "120")
172177
unset(CMAKE_CUDA_ARCHITECTURES_NORMALIZED)
173178
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
174-
if("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
179+
if(CUDA_ARCH MATCHES "^([0-9]+)f$")
180+
# Family code, no -real suffix
181+
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}")
182+
elseif("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
175183
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}a-real")
176184
else()
177185
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}-real")

cmake/external/onnxruntime_external_deps.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,11 @@ if (onnxruntime_USE_WEBGPU)
764764
# - (private) Fix compatibility issues with Safari. Contains the following changes:
765765
# - Polyfill for `device.AdapterInfo` (returns `undefined` in Safari v26.0)
766766
#
767-
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/safari_polyfill.patch)
767+
${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/safari_polyfill.patch &&
768+
769+
# Remove the test folder to speed up potential file scan operations (70k+ files not needed for build).
770+
# Using <SOURCE_DIR> token ensures the correct absolute path regardless of working directory.
771+
${CMAKE_COMMAND} -E rm -rf <SOURCE_DIR>/test)
768772

769773
onnxruntime_fetchcontent_declare(
770774
dawn

cmake/external/xnnpack.cmake

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
6262
SET(ORT_TARGET_PROCESSOR "arm64")
6363
ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le")
6464
SET(ORT_TARGET_PROCESSOR "ppc64")
65+
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
66+
SET(ORT_TARGET_PROCESSOR "wasm")
6567
ELSEIF(NOT ORT_TARGET_PROCESSOR MATCHES "^(x86(_64)?|arm64|riscv(32|64|128)|Hexagon|ppc64)$")
6668
SET(ORT_TARGET_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}")
6769
ELSE()
@@ -90,18 +92,21 @@ onnxruntime_fetchcontent_makeavailable(googlexnnpack)
9092
set(XNNPACK_DIR ${googlexnnpack_SOURCE_DIR})
9193
set(XNNPACK_INCLUDE_DIR ${XNNPACK_DIR}/include)
9294

93-
set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK xnnpack-microkernels-prod pthreadpool)
95+
set(onnxruntime_EXTERNAL_LIBRARIES_XNNPACK XNNPACK pthreadpool)
9496
if(ORT_TARGET_PROCESSOR MATCHES "^arm64.*" AND NOT CMAKE_C_COMPILER_ID STREQUAL "MSVC")
9597
list(APPEND onnxruntime_EXTERNAL_LIBRARIES_XNNPACK kleidiai)
9698
endif()
99+
if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
100+
list(APPEND onnxruntime_EXTERNAL_LIBRARIES_XNNPACK xnnpack-microkernels-prod)
101+
endif()
97102

98103
# the XNNPACK CMake setup doesn't include the WASM kernels so we have to manually set those up
99104
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
100105
# See source lists in _deps/googlexnnpack-src/BUILD.bazel for wasm_prod_microkernels
101106
message("Adding WebAssembly Source Files to XNNPACK")
102107
set(wasm_srcs "")
103108

104-
file(READ "${XNNPACK_DIR}/BUILD.bazel" xnnpack_bazel_config)
109+
file(READ "${XNNPACK_DIR}/build_srcs.bzl" xnnpack_bazel_config)
105110

106111
# Replace newlines with semicolon so that it is treated as a list by CMake
107112
# Also replace '[' and ']' so the bazel source lists don't get parsed as a nested list by cmake
@@ -139,19 +144,26 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
139144
GetSrcListFromBazel("TABLE_SRCS" table_srcs)
140145
list(APPEND wasm_srcs ${operator_srcs} ${table_srcs})
141146

142-
# kernels
143-
list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/scalar.c)
144-
list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/wasm.c)
147+
set(microkernel_src "")
148+
149+
include(${XNNPACK_DIR}/cmake/gen/scalar_microkernels.cmake)
150+
list(APPEND microkernel_src ${PROD_SCALAR_MICROKERNEL_SRCS})
151+
list(APPEND microkernel_src ${PROD_WASM_MICROKERNEL_SRCS})
145152

146153
if(onnxruntime_ENABLE_WEBASSEMBLY_RELAXED_SIMD)
147-
list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/wasmsimd.c)
148-
list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/wasmrelaxedsimd.c)
154+
include(${XNNPACK_DIR}/cmake/gen/wasmsimd_microkernels.cmake)
155+
include(${XNNPACK_DIR}/cmake/gen/wasmrelaxedsimd_microkernels.cmake)
156+
list(APPEND microkernel_src ${PROD_WASMSIMD_MICROKERNEL_SRCS})
157+
list(APPEND microkernel_src ${PROD_WASMRELAXEDSIMD_MICROKERNEL_SRCS})
149158
target_compile_options(XNNPACK PRIVATE "-msimd128")
150159
target_compile_options(XNNPACK PRIVATE "-mrelaxed-simd")
151160
elseif(onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
152-
list(APPEND wasm_srcs ${XNNPACK_DIR}/src/amalgam/gen/wasmsimd.c)
161+
include(${XNNPACK_DIR}/cmake/gen/wasmsimd_microkernels.cmake)
162+
list(APPEND microkernel_src ${PROD_WASMSIMD_MICROKERNEL_SRCS})
153163
target_compile_options(XNNPACK PRIVATE "-msimd128")
154164
endif()
165+
list(TRANSFORM microkernel_src PREPEND "${XNNPACK_DIR}/")
166+
list(APPEND wasm_srcs ${microkernel_src})
155167

156168
message(DEBUG "wasm_srcs: ${wasm_srcs}\n")
157169
target_sources(XNNPACK PRIVATE ${wasm_srcs})

cmake/onnxruntime_providers_nv.cmake

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# Licensed under the MIT License.
4-
find_package(CUDAToolkit REQUIRED 12.8)
4+
if(onnxruntime_CUDA_HOME)
5+
file(TO_CMAKE_PATH ${onnxruntime_CUDA_HOME} CUDAToolkit_ROOT)
6+
endif()
7+
find_package(CUDAToolkit REQUIRED)
58
enable_language(CUDA)
69
if(onnxruntime_DISABLE_CONTRIB_OPS)
710
message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )
@@ -146,9 +149,9 @@ endif ()
146149
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE Eigen3::Eigen onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface Eigen3::Eigen)
147150
add_dependencies(onnxruntime_providers_nv_tensorrt_rtx onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
148151
if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
149-
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
152+
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS} PUBLIC CUDA::cudart CUDA::cuda_driver)
150153
else()
151-
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${onnxparser_link_libs} ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
154+
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${onnxparser_link_libs} ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS} PUBLIC CUDA::cudart CUDA::cuda_driver)
152155
endif()
153156
target_include_directories(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${TENSORRT_RTX_INCLUDE_DIR} ${onnx_tensorrt_SOURCE_DIR}
154157
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})

docs/Versioning.md

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,67 @@ The version number of the current stable release can be found
1111
## Release cadence
1212
See [Release Management](ReleaseManagement.md)
1313

14+
## Updating the Version for a Release
15+
16+
When preparing a release, follow these steps to update the version number across the codebase. This applies both when creating an initial release branch (updating `main`) and when preparing patch releases on release branches:
17+
18+
### Prerequisites
19+
- Node.js (check [js/.nvmrc](../js/.nvmrc) for the required version)
20+
- npm (comes with Node.js)
21+
- Python 3
22+
23+
Verify your setup:
24+
```bash
25+
node --version # Should match the version in js/.nvmrc
26+
npm --version # Should be v8.0 or newer
27+
```
28+
29+
### Steps
30+
31+
1. **Update the VERSION_NUMBER file**
32+
33+
Edit [VERSION_NUMBER](../VERSION_NUMBER) in the repository root to reflect the new version (e.g., `1.23.3`).
34+
35+
2. **Run the version update script**
36+
37+
From the repository root, run:
38+
```bash
39+
python tools/python/update_version.py
40+
```
41+
42+
This script automatically updates version numbers in:
43+
- `docs/Versioning.md` - Adds a new row to the version table
44+
- `docs/python/README.rst` - Adds release notes entry
45+
- `onnxruntime/__init__.py` - Python package version
46+
- `js/` packages - All NPM package versions and lock files
47+
48+
3. **Update the C API static_assert (Manual Step)**
49+
50+
The script does **not** update the version check in the C API. You must manually update the `static_assert` in [onnxruntime/core/session/onnxruntime_c_api.cc](../onnxruntime/core/session/onnxruntime_c_api.cc).
51+
52+
Search for `static_assert(std::string_view(ORT_VERSION)` and update the version string:
53+
```cpp
54+
static_assert(std::string_view(ORT_VERSION) == "X.Y.Z",
55+
"ORT_Version change detected, please follow below steps to ensure OrtApi is updated properly");
56+
```
57+
58+
Replace `X.Y.Z` with your new version number. The comments following this assert explain additional steps if new APIs were added to this release.
59+
60+
4. **Review all changes**
61+
62+
Review all modified files. Verify:
63+
- Version numbers are correct in all updated files
64+
- The release notes URL format is correct (e.g., `https://github.com/Microsoft/onnxruntime/releases/tag/vX.Y.Z`)
65+
66+
5. **Commit and create PR**
67+
68+
Commit all changes and create a PR targeting `main` or a release branch as appropriate.
69+
70+
### Notes
71+
72+
- The version table in this file and the ONNX opset compatibility information on [onnxruntime.ai](https://onnxruntime.ai/docs/reference/compatibility.html#onnx-opset-support) are the canonical sources for version compatibility information.
73+
- For ONNX version/opset/IR reference numbers, see the [ONNX Versioning documentation](https://github.com/onnx/onnx/blob/main/docs/Versioning.md#released-versions).
74+
1475
# Compatibility
1576
1677
## Backwards compatibility

js/web/docs/webnn-operators.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ platforms. Check the [WebNN status](https://webmachinelearning.github.io/webnn-s
5252
| GlobalLpPool| ai.onnx(7+) | l2Pool2d | Only supports 4-D input, 'p' value is 2 |
5353
| Greater | ai.onnx(7-8, 9-12, 13+) | greater | |
5454
| GreaterOrEqual | ai.onnx(12-15, 16+) | greaterOrEqual | |
55-
| GroupQueryAttention | com.microsoft(1+) | add, cast, concat, constant, cumulativeSum, div, expand, lesser, matmul, reshape, scatterND, softmax, transpose, where | Only supports input total_sequence_length is constant and past_sequence_length of past kv equals to present_sequence_length of present kv. Does not support cos_cache and sin_cache inputs |
55+
| GroupQueryAttention | com.microsoft(1+) | add, cast, concat, constant, cumulativeSum, div, expand, lesser, matmul, reshape, scatterND, softmax, transpose, where | Only supports input total_sequence_length is constant and past_sequence_length of past kv equals to present_sequence_length of present kv. |
5656
| GRU | ai.onnx(7-13, 14-21, 22+) | gru | Only supports 'layout' == 0. 'clip' is not supported. The activation functions in 'activations' must be one of 'Relu', 'Tanh', 'Sigmoid'. Forward and backward activations must be the same if bidirectional. 'sequence_lens' if present should be constant with values equal to the first dimension length of input 'X' |
5757
| HardSigmoid | ai.onnx(7+) | hardSigmoid | |
5858
| HardSwish | ai.onnx(14+) | hardSwish | |

js/web/lib/wasm/jsep/webgpu/ops/conv-transpose.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ export const parseConvTransposeAttributes = (attributes: Record<string, unknown>
132132
typeof attributes.autoPad == 'undefined' ? 0 : (attributes.autoPad as number)
133133
];
134134
const dilations = attributes.dilations as [number, number];
135-
const group = attributes.group as number;
135+
const group = (attributes.group as number) ?? 1; // default to 1 per ONNX spec
136136
const kernelShape = attributes.kernelShape as [number, number];
137137
const pads = attributes.pads as [number, number, number, number];
138138
const strides = attributes.strides as [number, number];

0 commit comments

Comments
 (0)