-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[EP API] header-only adapter for EP API #26919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
c9e4973
e6c2e8e
4bfc650
c79d03d
e0e4037
f88218e
db67207
2247620
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| ## EP adapter | ||
|
|
||
| This folder contains a set of C++ header files. They are used specifically for allowing ONNX Runtime internal kernel-based EPs to use the plugin-style EP API while keep minimal changes to existing code. | ||
|
|
||
| ### Usage | ||
|
|
||
| Make sure to include "ep/_pch.h" for all source code in the implementation. Using PCH compiler flag is recommended. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still don't understand the name
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we are using we do have some 'pch.h' in winml folder, a 'test_pch.h' in cmake folder, and a PCH for CUDA EP: onnxruntime\core\providers\cuda\cuda_pch.h. Using the filename containing 'pch' may be a good way to indicating that the file is used as PCH explicitly. The reason why the file is not inside |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #include "api.h" | ||
|
Check warning on line 6 in include/onnxruntime/ep/_pch.h
|
||
| #include "common.h" | ||
|
Check warning on line 7 in include/onnxruntime/ep/_pch.h
|
||
|
|
||
| // This header is only used when building WebGPU/CUDA EP as a shared library. | ||
| // | ||
| // This header file is used as a precompiled header so it is always included first. | ||
|
|
||
| #pragma push_macro("ORT_EP_API_ADAPTER_HEADER_INCLUDED") | ||
fs-eire marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| #define ORT_EP_API_ADAPTER_HEADER_INCLUDED | ||
|
|
||
| #include "adapter/allocator.h" | ||
| #include "adapter/logging.h" | ||
| #include "adapter/ep.h" | ||
| #include "adapter/kernel_registry.h" | ||
|
|
||
| #pragma pop_macro("ORT_EP_API_ADAPTER_HEADER_INCLUDED") | ||
|
|
||
| // | ||
| // EP specific using declarations | ||
| // | ||
|
|
||
| #define EP_SPECIFIC_USING_DECLARATIONS \ | ||
| using FuncManager = onnxruntime::ep::adapter::FuncManager; \ | ||
| using KernelCreatePtrFn = onnxruntime::ep::adapter::KernelCreatePtrFn; \ | ||
| using KernelDefBuilder = onnxruntime::ep::adapter::KernelDefBuilder; \ | ||
| using KernelRegistry = onnxruntime::ep::adapter::KernelRegistry; \ | ||
| using KernelCreateInfo = onnxruntime::ep::adapter::KernelCreateInfo; \ | ||
| using BuildKernelCreateInfoFn = onnxruntime::ep::adapter::KernelCreateInfo (*)(); \ | ||
| using OpKernelInfo = onnxruntime::ep::adapter::OpKernelInfo; \ | ||
| using OpKernelContext = onnxruntime::ep::adapter::OpKernelContext; \ | ||
| using OpKernel = onnxruntime::ep::adapter::OpKernel; \ | ||
| using DataTransferManager = onnxruntime::ep::adapter::DataTransferManager; \ | ||
| namespace logging { \ | ||
| using Logger = onnxruntime::ep::adapter::Logger; \ | ||
| } | ||
|
|
||
| namespace onnxruntime { | ||
| namespace webgpu { | ||
| EP_SPECIFIC_USING_DECLARATIONS | ||
| } // namespace webgpu | ||
| namespace cuda { | ||
| EP_SPECIFIC_USING_DECLARATIONS | ||
| } // namespace cuda | ||
|
|
||
| #ifndef DISABLE_CONTRIB_OPS | ||
| namespace contrib { | ||
| namespace webgpu { | ||
| EP_SPECIFIC_USING_DECLARATIONS | ||
| } // namespace webgpu | ||
| namespace cuda { | ||
| EP_SPECIFIC_USING_DECLARATIONS | ||
| } // namespace cuda | ||
| } // namespace contrib | ||
| #endif | ||
|
|
||
| } // namespace onnxruntime | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #pragma once | ||
|
|
||
fs-eire marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| #include "core/framework/allocator.h" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. naive question - is this header intended to be used by a plugin EP which doesn't have access to internal ORT code? if so, how can we use internal ORT headers?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. The whole purpose of the folder This means most of the existing code can be kept as-is. For some components, we need a "reverse bridge" to support an implementation of |
||
|
|
||
| namespace onnxruntime { | ||
| namespace ep { | ||
| namespace adapter { | ||
|
|
||
| /// <summary> | ||
| /// A bridge class between the EP API OrtAllocator and an IAllocator implementation. | ||
| /// </summary> | ||
| class Allocator : public OrtAllocator { | ||
| public: | ||
| explicit Allocator(const OrtMemoryInfo* memory_info, AllocatorPtr impl) | ||
| : OrtAllocator{}, memory_info_(memory_info), impl_(impl) { | ||
| version = ORT_API_VERSION; | ||
| Alloc = AllocImpl; | ||
| Free = FreeImpl; | ||
| Info = InfoImpl; | ||
| } | ||
|
|
||
| private: | ||
| static void* ORT_API_CALL AllocImpl(OrtAllocator* this_ptr, size_t size) noexcept { | ||
| auto* allocator = static_cast<Allocator*>(this_ptr); | ||
| return allocator->impl_->Alloc(size); | ||
| } | ||
|
|
||
| static void ORT_API_CALL FreeImpl(OrtAllocator* this_ptr, void* p) noexcept { | ||
| auto* allocator = static_cast<Allocator*>(this_ptr); | ||
| allocator->impl_->Free(p); | ||
| } | ||
|
|
||
| static const OrtMemoryInfo* ORT_API_CALL InfoImpl(const OrtAllocator* this_ptr) noexcept { | ||
| auto* allocator = static_cast<const Allocator*>(this_ptr); | ||
| return allocator->memory_info_; | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just want to double check that
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can always extend those classes to support more function that may be missing for CUDA. The classes are generally designed to be generic enough for being reused in a future migration for CUDA EP. |
||
| const OrtMemoryInfo* memory_info_; | ||
| AllocatorPtr impl_; | ||
| }; | ||
|
|
||
| } // namespace adapter | ||
| } // namespace ep | ||
| } // namespace onnxruntime | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #if !defined(ORT_EP_API_ADAPTER_HEADER_INCLUDED) | ||
| #error "This header should not be included directly. Include ep/_pch.h instead." | ||
| #endif | ||
|
|
||
| #include "core/common/status.h" | ||
| #include "core/common/common.h" | ||
| #include "core/framework/data_transfer.h" | ||
| #include "core/framework/tensor.h" | ||
|
|
||
| namespace onnxruntime { | ||
| namespace ep { | ||
| namespace adapter { | ||
|
|
||
| /// <summary> | ||
| /// An adapter class partially implementing the facade of `onnxruntime::DataTransferManager`. | ||
| /// </summary> | ||
| struct DataTransferManager { | ||
| explicit DataTransferManager(std::unique_ptr<IDataTransfer> impl) : impl_{std::move(impl)} {} | ||
|
Check warning on line 23 in include/onnxruntime/ep/adapter/data_transfer_manager.h
|
||
|
|
||
| common::Status CopyTensor(const Tensor& src, Tensor& dst) const { | ||
| if (src.Shape().Size() != dst.Shape().Size()) { | ||
| return ORT_MAKE_STATUS(ONNXRUNTIME, | ||
| FAIL, | ||
| "Tensor size mismatch: source tensor size is ", | ||
| src.Shape().Size(), | ||
| ", destination tensor size is ", | ||
| dst.Shape().Size()); | ||
| } | ||
|
|
||
| if (impl_->CanCopy(src.Location().device, dst.Location().device)) { | ||
| return impl_->CopyTensor(src, dst); | ||
| } | ||
|
|
||
| return ORT_MAKE_STATUS(ONNXRUNTIME, | ||
| FAIL, | ||
| "There's no data transfer registered for copying tensors from ", | ||
| src.Location().device.ToString(), | ||
| " to ", | ||
| dst.Location().device.ToString()); | ||
| } | ||
|
|
||
| private: | ||
| ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(DataTransferManager); | ||
| std::unique_ptr<IDataTransfer> impl_; | ||
|
Check warning on line 49 in include/onnxruntime/ep/adapter/data_transfer_manager.h
|
||
| }; | ||
|
|
||
| } // namespace adapter | ||
| } // namespace ep | ||
| } // namespace onnxruntime | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #if !defined(ORT_EP_API_ADAPTER_HEADER_INCLUDED) | ||
| #error "This header should not be included directly. Include ep/_pch.h instead." | ||
| #endif | ||
|
|
||
| #include "data_transfer_manager.h" | ||
|
Check warning on line 10 in include/onnxruntime/ep/adapter/ep.h
|
||
|
|
||
| #include "core/framework/execution_provider.h" | ||
|
|
||
| namespace onnxruntime { | ||
| namespace ep { | ||
| namespace adapter { | ||
|
|
||
| /// <summary> | ||
| /// Wrapper around IExecutionProvider to expose via OrtEp. | ||
| /// </summary> | ||
| class Ep : public OrtEp { | ||
| protected: | ||
| explicit Ep(IExecutionProvider* impl, AllocatorPtr temp_space_cpu_allocator, AllocatorPtr temp_space_allocator) | ||
| : OrtEp{}, | ||
| impl_(impl), | ||
| data_transfer_manager_{impl->GetDataTransfer()}, | ||
| profiler_{impl->GetProfiler()}, | ||
| temp_space_cpu_allocator_{temp_space_cpu_allocator}, | ||
| temp_space_allocator_{temp_space_allocator} { | ||
| } | ||
|
|
||
| public: | ||
| inline IExecutionProvider* EpImpl() const noexcept { | ||
| return impl_.get(); | ||
| } | ||
| inline const DataTransferManager& GetDataTransferManager() const noexcept { | ||
| return data_transfer_manager_; | ||
| } | ||
| [[nodiscard]] Status GetTempSpaceCPUAllocator(AllocatorPtr* output) const { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer to keep it in the current way. There are 2 reasons:
|
||
| *output = temp_space_cpu_allocator_; | ||
| return Status::OK(); | ||
| } | ||
| [[nodiscard]] Status GetTempSpaceAllocator(AllocatorPtr* output) const { | ||
| *output = temp_space_allocator_; | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| private: | ||
| std::unique_ptr<IExecutionProvider> impl_; | ||
| DataTransferManager data_transfer_manager_; | ||
| std::unique_ptr<profiling::EpProfiler> profiler_; | ||
|
Check warning on line 51 in include/onnxruntime/ep/adapter/ep.h
|
||
| AllocatorPtr temp_space_cpu_allocator_; | ||
| AllocatorPtr temp_space_allocator_; | ||
| }; | ||
|
|
||
| } // namespace adapter | ||
| } // namespace ep | ||
| } // namespace onnxruntime | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #pragma once | ||
|
|
||
| #if !defined(ORT_EP_API_ADAPTER_HEADER_INCLUDED) | ||
| #error "This header should not be included directly. Include ep/_pch.h instead." | ||
| #endif | ||
|
|
||
| #include <memory> | ||
|
|
||
| namespace onnxruntime { | ||
| namespace ep { | ||
| namespace adapter { | ||
|
|
||
| /// <summary> | ||
| /// An adapter class partially implementing the facade of `onnxruntime::KernelDef`. | ||
| /// </summary> | ||
| class KernelDef { | ||
| public: | ||
| explicit KernelDef(const OrtKernelInfo* kernel_info) : kernel_info_{kernel_info} {} | ||
|
|
||
| const std::string OpName() const { | ||
| return kernel_info_.GetNodeName(); | ||
| } | ||
|
|
||
| const std::string Domain() const { | ||
|
Check warning on line 27 in include/onnxruntime/ep/adapter/kernel_def.h
|
||
| return kernel_info_.GetOperatorDomain(); | ||
| } | ||
|
|
||
| private: | ||
| const Ort::ConstKernelInfo kernel_info_; | ||
| }; | ||
|
|
||
| } // namespace adapter | ||
| } // namespace ep | ||
| } // namespace onnxruntime | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this readme is not located in the
adaptersdirectory. maybe move it there, or explicitly name theadaptersdirectory.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a section in the README to explain the folder structure. Since there is an ongoing discussion about unifying the shared headers, I expect the current folder structure to be refactored later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in future, when we have a dedicated folder for the shared plugin EP headers, we can put everything in the
adapterdirectory.