Skip to content

Commit e0b66ca

Browse files
amarin16jambaykfs-eirequic-ashigargAshish Garg
authored
Round 2 of cherry-picks into rel-1.21.0 (#23899)
The second round of cherry-picks into [rel-1.21.0](https://github.com/microsoft/onnxruntime/tree/rel-1.21.0). The first one was done in #23846. - #23779 - #23856 - #23827 - #23834 - #23876 - #23892 --------- Co-authored-by: Jambay Kinley <[email protected]> Co-authored-by: Yulong Wang <[email protected]> Co-authored-by: Ashish Garg <[email protected]> Co-authored-by: Ashish Garg <[email protected]>
1 parent beb1a92 commit e0b66ca

File tree

17 files changed

+225
-39
lines changed

17 files changed

+225
-39
lines changed

ThirdPartyNotices.txt

+35
Original file line numberDiff line numberDiff line change
@@ -6045,3 +6045,38 @@ https://github.com/intel/neural-speed
60456045
terms, and open source software license terms. These separate license terms
60466046
govern your use of the third party programs as set forth in the
60476047
"THIRD-PARTY-PROGRAMS" file.
6048+
6049+
_____
6050+
6051+
dawn
6052+
6053+
https://dawn.googlesource.com/dawn
6054+
6055+
BSD 3-Clause License
6056+
6057+
Copyright 2017-2023 The Dawn & Tint Authors
6058+
6059+
Redistribution and use in source and binary forms, with or without
6060+
modification, are permitted provided that the following conditions are met:
6061+
6062+
1. Redistributions of source code must retain the above copyright notice, this
6063+
list of conditions and the following disclaimer.
6064+
6065+
2. Redistributions in binary form must reproduce the above copyright notice,
6066+
this list of conditions and the following disclaimer in the documentation
6067+
and/or other materials provided with the distribution.
6068+
6069+
3. Neither the name of the copyright holder nor the names of its
6070+
contributors may be used to endorse or promote products derived from
6071+
this software without specific prior written permission.
6072+
6073+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
6074+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6075+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
6076+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
6077+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
6078+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
6079+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
6080+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
6081+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
6082+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake/onnxruntime_framework.cmake

+1-4
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,7 @@ elseif(onnxruntime_ENABLE_TRITON)
3636
endif()
3737

3838
if (onnxruntime_MINIMAL_BUILD)
39-
set(onnxruntime_framework_src_exclude
40-
"${ONNXRUNTIME_ROOT}/core/framework/fallback_cpu_capability.h"
41-
"${ONNXRUNTIME_ROOT}/core/framework/fallback_cpu_capability.cc"
42-
)
39+
set(onnxruntime_framework_src_exclude)
4340

4441
# custom ops support must be explicitly enabled in a minimal build. exclude if not.
4542
if (NOT onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)

cmake/onnxruntime_providers_js.cmake

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# Licensed under the MIT License.
33

4+
if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
5+
message(FATAL_ERROR "JSEP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
6+
endif()
7+
48
add_compile_definitions(USE_JSEP=1)
59

610
file(GLOB_RECURSE onnxruntime_providers_js_cc_srcs
@@ -18,4 +22,4 @@
1822
onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers Boost::mp11 Eigen3::Eigen
1923
)
2024

21-
add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})
25+
add_dependencies(onnxruntime_providers_js ${onnxruntime_EXTERNAL_DEPENDENCIES})

js/common/lib/tensor-impl-type-mapping.ts

+3-6
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,6 @@ export const NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP = new Map<SupportedTypedArray
4444
[Uint32Array, 'uint32'],
4545
]);
4646

47-
// a dummy type declaration for Float16Array in case any polyfill is available.
48-
declare global {
49-
// eslint-disable-next-line @typescript-eslint/naming-convention, @typescript-eslint/no-explicit-any
50-
const Float16Array: any;
51-
}
52-
5347
// the following code allows delaying execution of BigInt/Float16Array checking. This allows lazy initialization for
5448
// NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP and NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP, which allows BigInt/Float16Array
5549
// polyfill if available.
@@ -59,6 +53,9 @@ export const checkTypedArray = () => {
5953
isTypedArrayChecked = true;
6054
const isBigInt64ArrayAvailable = typeof BigInt64Array !== 'undefined' && BigInt64Array.from;
6155
const isBigUint64ArrayAvailable = typeof BigUint64Array !== 'undefined' && BigUint64Array.from;
56+
57+
// eslint-disable-next-line @typescript-eslint/naming-convention, @typescript-eslint/no-explicit-any
58+
const Float16Array = (globalThis as any).Float16Array;
6259
const isFloat16ArrayAvailable = typeof Float16Array !== 'undefined' && Float16Array.from;
6360

6461
if (isBigInt64ArrayAvailable) {

js/common/lib/tensor-impl.ts

+7
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,13 @@ export class Tensor implements TensorInterface {
261261
} else {
262262
throw new TypeError(`A Uint8ClampedArray tensor's data must be type of uint8`);
263263
}
264+
} else if (arg0 === 'float16' && arg1 instanceof Uint16Array && typedArrayConstructor !== Uint16Array) {
265+
// when Float16Array is available and data is of type Uint16Array.
266+
// We allow Uint16Array to be passed in as data for 'float16' tensor until Float16Array is generally
267+
// supported in JavaScript environment.
268+
269+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
270+
data = new (globalThis as any).Float16Array(arg1.buffer, arg1.byteOffset, arg1.length);
264271
} else {
265272
throw new TypeError(`A ${type} tensor's data must be type of ${typedArrayConstructor}`);
266273
}

js/common/package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"build": "node ./build.js",
1616
"prepare": "npm run build",
1717
"pretest": "tsc --build ./test",
18-
"test": "mocha ./test/**/*.js --timeout 30000"
18+
"test": "mocha \"./test/**/*.js\" --timeout 30000",
19+
"test:f16": "mocha -n js-float16array \"./test/**/*.js\" --timeout 30000"
1920
},
2021
"devDependencies": {
2122
"typedoc": "^0.25.7"

js/common/test/unit-tests/common.ts

+3-2
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ export const NUMBER_COMPATIBLE_NUMERICAL_TYPES = [
2929
export const BIGINT_TYPES = [['int64', BigInt64Array, true] as const, ['uint64', BigUint64Array, true] as const];
3030

3131
/**
32-
* float16 type, data represented by Uint16Array
32+
* float16 type, data represented by Uint16Array/Float16Array
3333
*/
34-
export const FLOAT16_TYPE = ['float16', Uint16Array, false] as const;
34+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
35+
export const FLOAT16_TYPE = ['float16', (globalThis as any).Float16Array ?? Uint16Array, false] as const;
3536

3637
/**
3738
* A list of all numerical types.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
import assert from 'assert/strict';
5+
import { Tensor } from 'onnxruntime-common';
6+
7+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
8+
const globalF16 = (globalThis as any).Float16Array;
9+
10+
(globalF16 ? describe : describe.skip)('Tensor Constructor Tests - check type float16 (Float16Array available)', () => {
11+
it("[float16] new Tensor('float16', numbers, dims): allow number array when Float16Array is available", () => {
12+
const tensor = new Tensor('float16', [1, 2, 3, 4], [2, 2]);
13+
assert.equal(tensor.type, 'float16', "tensor.type should be 'float16'");
14+
assert(tensor.data instanceof globalF16, "tensor.data should be an instance of 'Float16Array'");
15+
assert.equal(tensor.data[0], 1, 'tensor.data[0] should be 1');
16+
assert.equal(tensor.data[1], 2, 'tensor.data[1] should be 2');
17+
assert.equal(tensor.data[2], 3, 'tensor.data[2] should be 3');
18+
assert.equal(tensor.data[3], 4, 'tensor.data[3] should be 4');
19+
assert.equal(tensor.data.length, 4, 'tensor.data.length should be 4');
20+
});
21+
22+
it("[float16] new Tensor('float16', float16array, dims): allow Float16Array when Float16Array is available", () => {
23+
const tensor = new Tensor('float16', new globalF16([1, 2, 3, 4]), [2, 2]);
24+
assert.equal(tensor.type, 'float16', "tensor.type should be 'float16'");
25+
assert(tensor.data instanceof globalF16, "tensor.data should be an instance of 'Float16Array'");
26+
assert.equal(tensor.data[0], 1, 'tensor.data[0] should be 1');
27+
assert.equal(tensor.data[1], 2, 'tensor.data[1] should be 2');
28+
assert.equal(tensor.data[2], 3, 'tensor.data[2] should be 3');
29+
assert.equal(tensor.data[3], 4, 'tensor.data[3] should be 4');
30+
assert.equal(tensor.data.length, 4, 'tensor.data.length should be 4');
31+
});
32+
33+
it("[float16] new Tensor('float16', uint16array, dims): allow Uint16Array when Float16Array is available", () => {
34+
const tensor = new Tensor('float16', new Uint16Array([15360, 16384, 16896, 17408]), [2, 2]);
35+
assert.equal(tensor.type, 'float16', "tensor.type should be 'float16'");
36+
assert(tensor.data instanceof globalF16, "tensor.data should be an instance of 'Float16Array'");
37+
assert.equal(tensor.data[0], 1, 'tensor.data[0] should be 1');
38+
assert.equal(tensor.data[1], 2, 'tensor.data[1] should be 2');
39+
assert.equal(tensor.data[2], 3, 'tensor.data[2] should be 3');
40+
assert.equal(tensor.data[3], 4, 'tensor.data[3] should be 4');
41+
assert.equal(tensor.data.length, 4, 'tensor.data.length should be 4');
42+
});
43+
});
44+
45+
(globalF16 ? describe.skip : describe)(
46+
'Tensor Constructor Tests - check type float16 (Float16Array not available)',
47+
() => {
48+
it(
49+
"[float16] new Tensor('float16', numbers, dims): " +
50+
"expect to throw because it's not allowed to construct 'float16' tensor from number array",
51+
() => {
52+
assert.throws(() => new Tensor('float16', [1, 2, 3, 4], [2, 2]), TypeError);
53+
},
54+
);
55+
56+
it("[float16] new Tensor('float16', uint16array, dims): allow Uint16Array", () => {
57+
const tensor = new Tensor('float16', new Uint16Array([15360, 16384, 16896, 17408]), [2, 2]);
58+
assert.equal(tensor.type, 'float16', "tensor.type should be 'float16'");
59+
assert(tensor.data instanceof Uint16Array, "tensor.data should be an instance of 'Uint16Array'");
60+
});
61+
},
62+
);

js/common/test/unit-tests/tensor/constructor-type.ts

-8
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,6 @@ describe('Tensor Constructor Tests - check types', () => {
105105
assert(tensor.data instanceof Uint8Array, "tensor.data should be an instance of 'Uint8Array'");
106106
});
107107

108-
it(
109-
"[float16] new Tensor('float16', numbers, dims): " +
110-
"expect to throw because it's not allowed to construct 'float16' tensor from number array",
111-
() => {
112-
assert.throws(() => new Tensor('float16', [1, 2, 3, 4], [2, 2]), TypeError);
113-
},
114-
);
115-
116108
it("[badtype] new Tensor('a', numbers, dims): expect to throw because 'a' is an invalid type", () => {
117109
assert.throws(() => new TensorAny('a', [1, 2, 3, 4], [2, 2]), TypeError);
118110
});

onnxruntime/core/framework/fallback_cpu_capability.cc

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4+
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
5+
46
#include "core/framework/fallback_cpu_capability.h"
57
#include "core/common/inlined_containers.h"
68

@@ -176,3 +178,5 @@ std::unordered_set<NodeIndex> GetCpuPreferredNodes(const onnxruntime::GraphViewe
176178
}
177179

178180
} // namespace onnxruntime
181+
182+
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

onnxruntime/core/framework/fallback_cpu_capability.h

+4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#pragma once
55

6+
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
7+
68
#include <gsl/gsl>
79
#include "core/common/inlined_containers_fwd.h"
810
#include "core/framework/execution_provider.h" // for IExecutionProvider::IKernelLookup
@@ -26,3 +28,5 @@ std::unordered_set<NodeIndex> GetCpuPreferredNodes(const GraphViewer& graph,
2628
const logging::Logger& logger);
2729

2830
} // namespace onnxruntime
31+
32+
#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

onnxruntime/core/providers/qnn/qnn_allocator.cc

+3-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,9 @@ void HtpSharedMemoryAllocator::Free(void* allocation_address) {
181181
// Avoid throwing exceptions as this may be running from a destructor.
182182
try {
183183
// take ownership of shared memory and free at end of scope
184-
auto shared_memory = WrapSharedMemoryWithUniquePtr(allocation_address, rpcmem_lib_->Api());
184+
const size_t allocation_offset = AllocationOffsetFromStartOfHeader();
185+
void* raw_allocation_address = (void*)((std::byte*)allocation_address - allocation_offset);
186+
auto shared_memory = WrapSharedMemoryWithUniquePtr(raw_allocation_address, rpcmem_lib_->Api());
185187

186188
// destroy header
187189
allocation_header.~AllocationHeader();

onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def get_qnn_qdq_config(
5555
stride: int | None = None,
5656
calibration_providers: list[str] | None = None,
5757
op_types_to_quantize: list[str] | None = None,
58+
nodes_to_exclude: list[str] | None = None,
5859
) -> StaticQuantConfig:
5960
"""
6061
Returns a static quantization configuration suitable for running QDQ models on QNN EP.
@@ -122,6 +123,8 @@ def get_qnn_qdq_config(
122123
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
123124
[ "CPUExecutionProvider" ].
124125
op_types_to_quantize: If set to None, all operator types will be quantized except for OP_TYPES_TO_EXCLUDE
126+
nodes_to_exclude: List of nodes names to exclude from quantization. The nodes in this list will be excluded from
127+
quantization when it is not None.
125128
126129
Returns:
127130
A StaticQuantConfig object
@@ -167,10 +170,13 @@ def get_qnn_qdq_config(
167170
)
168171

169172
op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None
173+
nodes_to_exclude_set = set(nodes_to_exclude) if nodes_to_exclude else None
170174

171175
for node in model.graph.node:
172176
if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set:
173177
continue
178+
if nodes_to_exclude_set and node.name in nodes_to_exclude_set:
179+
continue
174180
op_types.add(node.op_type)
175181
qnn_compat.process_node(node)
176182

@@ -198,9 +204,10 @@ def get_qnn_qdq_config(
198204
calibrate_method=calibrate_method,
199205
activation_type=activation_type,
200206
weight_type=weight_type,
201-
op_types_to_quantize=op_types_to_quantize
202-
if op_types_to_quantize
203-
else list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
207+
op_types_to_quantize=(
208+
op_types_to_quantize if op_types_to_quantize else list(op_types.difference(OP_TYPES_TO_EXCLUDE))
209+
),
210+
nodes_to_exclude=nodes_to_exclude,
204211
per_channel=per_channel,
205212
use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
206213
calibration_providers=calibration_providers,

onnxruntime/python/tools/quantization/quantize.py

+22-10
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ def get_qdq_config(
240240
keep_removable_activations: bool = False,
241241
min_real_range: float | None = None,
242242
tensor_quant_overrides: dict[str, list[dict[str, Any]]] | None = None,
243+
calibration_providers: list[str] | None = None,
244+
op_types_to_quantize: list[str] | None = None,
243245
nodes_to_exclude: list[str] | Callable[[onnx.ModelProto, onnx.NodeProto], bool] | None = None,
244246
extra_options: dict | None = None,
245247
) -> StaticQuantConfig:
@@ -294,6 +296,10 @@ def get_qdq_config(
294296
'convert["recv_nodes"] = Set : Set of node names that consume the converted activation,
295297
other nodes get the original type. If not specified,
296298
assume all consumer nodes get the converted type.
299+
calibration_providers: Execution providers to run the session during calibration. Default is None which uses
300+
[ "CPUExecutionProvider" ].
301+
op_types_to_quantize: List of operator types to quantize. If None, all operators other than Cast, DequantizeLinear,
302+
and QuantizeLinear are quantized.
297303
nodes_to_exclude: List of nodes names to exclude from quantization. Alternatively, can provide a function that
298304
accepts an onnx.ModelProto and onnx.NodeProto as arguments and returns true if the give onnx.NodeProto
299305
should be excluded from quantization.
@@ -324,17 +330,20 @@ def get_qdq_config(
324330
if onnx.external_data_helper.uses_external_data(initializer):
325331
model_has_external_data = True
326332

327-
final_nodes_to_exclude = []
328-
if nodes_to_exclude is not None and isinstance(nodes_to_exclude, list):
329-
final_nodes_to_exclude.extend(nodes_to_exclude)
333+
op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None
334+
nodes_to_exclude_set = set(nodes_to_exclude) if isinstance(nodes_to_exclude, list) else set()
330335

331336
# Iterate through nodes to get all operator types in the model and
332337
# call user's function to filter out nodes from quantization.
333338
for node in model.graph.node:
334-
op_types.add(node.op_type)
335-
if nodes_to_exclude is not None and callable(nodes_to_exclude):
336-
if nodes_to_exclude(model, node):
337-
final_nodes_to_exclude.append(node.name)
339+
if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set:
340+
continue
341+
if node.name in nodes_to_exclude_set:
342+
continue
343+
if callable(nodes_to_exclude) and nodes_to_exclude(model, node):
344+
nodes_to_exclude_set.add(node.name)
345+
else:
346+
op_types.add(node.op_type)
338347

339348
final_extra_options = {
340349
"MinimumRealRange": min_real_range,
@@ -378,11 +387,14 @@ def get_qdq_config(
378387
quant_format=QuantFormat.QDQ,
379388
activation_type=activation_type,
380389
weight_type=weight_type,
381-
op_types_to_quantize=list(op_types.difference(op_types_to_exclude)),
382-
nodes_to_exclude=final_nodes_to_exclude,
390+
op_types_to_quantize=(
391+
op_types_to_quantize if op_types_to_quantize else list(op_types.difference(op_types_to_exclude))
392+
),
393+
nodes_to_exclude=list(nodes_to_exclude_set),
383394
per_channel=per_channel,
384395
reduce_range=reduce_range,
385396
use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
397+
calibration_providers=calibration_providers,
386398
extra_options=final_extra_options,
387399
)
388400

@@ -442,7 +454,7 @@ def check_static_quant_arguments(quant_format: QuantFormat, activation_type: Qua
442454
if activation_type != QuantType.QFLOAT8E4M3FN and weight_type == QuantType.QFLOAT8E4M3FN:
443455
raise ValueError(
444456
f"ONNXRuntime quantization doesn't support data format: activation_type={activation_type} "
445-
f"!=QuantType.QFLOAT8E4M3FN, weight_type=QuantType.QFLOAT8E4M3FN."
457+
"!=QuantType.QFLOAT8E4M3FN, weight_type=QuantType.QFLOAT8E4M3FN."
446458
)
447459

448460
if activation_type == QuantType.QFLOAT8E4M3FN and weight_type != QuantType.QFLOAT8E4M3FN:

0 commit comments

Comments
 (0)