Skip to content

Commit c5644ad

Browse files
Add documentation for compile API
1 parent 3b60c3c commit c5644ad

File tree

1 file changed

+313
-0
lines changed

1 file changed

+313
-0
lines changed

docs/execution-providers/EP-Context-Design.md

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,4 +379,317 @@ To use the dumped EPContext models with weight sharing enabled, ONNX Runtime inf
379379
380380
session1.run(...);
381381
session2.run(...);
382+
```
383+
384+
## Compile API
385+
ORT 1.22 introduced an explicit [model compilation API](https://github.com/microsoft/onnxruntime/blob/a5ba2ba3998820dd8da111c90c420479aac7a11e/onnxruntime/python/onnxruntime_inference_collection.py#L680-L709) that enables additional compilation options:
386+
- Read input model from a file or a buffer.
387+
- Write output model to a file, a buffer, or an output stream.
388+
- Provide a callback function to specify the location of each ONNX initializers in the output model.
389+
- Set compilation flags: "error if no nodes compiled", "error if output file already exists", etc.
390+
391+
### Usage example: compiling a model (from file) to an output stream
392+
```python
393+
import onnxruntime as ort
394+
395+
"""
396+
Compile a model (from file) to an output stream using a custom write function.
397+
The custom write function just saves the output model to disk.
398+
A custom initializer handler stores "large" initializers into an external file.
399+
"""
400+
input_model_path = "input_model.onnx"
401+
output_model_path = "output_model.onnx"
402+
output_initializer_file_path = "output_model.bin"
403+
404+
with open(output_model_path, "wb") as output_model_fd, \
405+
open(output_initializer_file_path, "wb") as output_initializer_fd:
406+
407+
# Custom function that ORT calls (one or more times) to stream out the model bytes in chunks.
408+
# This example function simply writes the output model to a file.
409+
def output_model_write_func(buffer: bytes):
410+
output_model_fd.write(buffer)
411+
412+
# Custom function that ORT calls to determine where to store each ONNX initializer in the output model.
413+
#
414+
# Note: the `external_info` argument denotes the location of the initializer in the original input model.
415+
# An implementation may choose to directly return the received `external_info` to use the same external weights.
416+
def output_model_onnx_initializer_handler(
417+
initializer_name: str,
418+
initializer_value: ort.OrtValue,
419+
external_info: ort.OrtExternalInitializerInfo | None,
420+
) -> ort.OrtExternalInitializerInfo | None:
421+
byte_size = initializer_value.tensor_size_in_bytes()
422+
423+
if byte_size < 64:
424+
return None # Store small initializer within output model.
425+
426+
# Else, write the initializer to a new external file and return its location to ORT
427+
value_np = initializer_value.numpy()
428+
file_offset = output_initializer_fd.tell()
429+
output_initializer_fd.write(value_np.tobytes())
430+
return ort.OrtExternalInitializerInfo(output_initializer_file_path, file_offset, byte_size)
431+
432+
session_options = ort.SessionOptions()
433+
434+
# Set the EP to use in this session.
435+
#
436+
# Example for plugin EP:
437+
# ep_devices = ort.get_ep_devices()
438+
# selected_ep_device = next((ep_device for ep_device in ep_devices if ep_device.ep_name == "SomeEp"), None)
439+
#
440+
# ep_options = {}
441+
# session_options.add_provider_for_devices(selected_ep_devices, ep_options)
442+
#
443+
# Example for legacy "provider-bridge" EP:
444+
# ep_options = {}
445+
# session_options.add_provider("SomeEp", ep_options)
446+
447+
# Compile the model
448+
model_compiler = ort.ModelCompiler(
449+
session_options,
450+
input_model_path,
451+
embed_compiled_data_into_model=True,
452+
get_initializer_location_func=output_model_onnx_initializer_handler,
453+
)
454+
model_compiler.compile_to_stream(output_model_write_func)
455+
456+
assert os.path.exists(output_model_path) == True
457+
```
458+
459+
The above snippet stores ONNX initializers for the output model into a new external file. To keep initializers in the same external file used in the original model,
460+
return the `external_info` argument from the `output_model_onnx_initializer_handler` function:
461+
462+
```python
463+
def output_model_onnx_initializer_handler(
464+
initializer_name: str,
465+
initializer_value: ort.OrtValue,
466+
external_info: ort.OrtExternalInitializerInfo | None,
467+
) -> ort.OrtExternalInitializerInfo | None:
468+
# The `external_info` argument denotes the location of the initializer in the original input model (if not None).
469+
# Return it directly to use the same external initializer file.
470+
return external_info
471+
472+
# ...
473+
```
474+
475+
#### References
476+
- [Additional Python usage examples in unit tests](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/python/onnxruntime_test_python_compile_api.py)
477+
- [Python ModelCompiler class](https://github.com/microsoft/onnxruntime/blob/a5ba2ba3998820dd8da111c90c420479aac7a11e/onnxruntime/python/onnxruntime_inference_collection.py#L680-L709)
478+
- [C++ API functions](https://github.com/microsoft/onnxruntime/blob/879ec0392ad5128968440a4e5b5a0bb742494ae5/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L1617-L1623)
479+
- [C API functions](https://github.com/microsoft/onnxruntime/blob/879ec0392ad5128968440a4e5b5a0bb742494ae5/include/onnxruntime/core/session/onnxruntime_c_api.h#L7751-L7774)
480+
481+
### Usage example: cross-compilation with a plugin EP
482+
By default, ONNX Runtime only allows the use of [plugin EPs](./plugin-ep-libraries.md) that are compatible with real hardware devices discovered by ONNX Runtime.
483+
To support the creation of compiled models targeted for hardware devices not present on the compiling machine (i.e., cross-compiling), a plugin EP may be allowed
484+
to create virtual hardware devices that an application can use to compile models.
485+
486+
#### Application code
487+
An application grants a plugin EP library permission to create virtual hardware device by using a library registration name
488+
that ends in the ".virtual" suffix:
489+
490+
```python
491+
import onnxruntime as ort
492+
import onnxruntime_ep_contoso_ai as contoso_ep
493+
494+
# An application uses a registration name that ends in ".virtual" to signal that virtual devices are allowed.
495+
ep_lib_registration_name = "contoso_ep_lib.virtual"
496+
ort.register_execution_provider_library(ep_lib_registration_name, contoso_ep.get_library_path())
497+
498+
# Set the EP to use for compilation
499+
ep_devices = ort.get_ep_devices()
500+
selected_ep_device = next((ep_device for ep_device in ep_devices if ep_device.ep_name == contoso_ep.get_ep_names()[0]), None)
501+
assert selected_ep_device.device.metadata["is_virtual"] == "1"
502+
503+
ep_options = {} # EP-specific options
504+
session_options = ort.SessionOptions()
505+
session_options.add_provider_for_devices(selected_ep_devices, ep_options)
506+
507+
# Compile the model
508+
model_compiler = ort.ModelCompiler(
509+
session_options,
510+
"input_model.onnx",
511+
# ... other options ...
512+
)
513+
model_compiler.compile_to_file("output_model.onnx")
514+
515+
# Unregister the library using the same registration name specified earlier.
516+
# Must only unregister a library after all `ModelCompiler` objects that use the library have been released.
517+
del model_compiler
518+
ort.unregister_execution_provider_library(ep_lib_registration_name)
519+
```
520+
521+
#### Plugin EP library code
522+
A plugin EP library determines if the creation of virtual devices is allowed by checking if the "allow_virtual_devices" environment configuration entry
523+
is set to "1". The following snippet from a [reference EP implementation](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/autoep/library/example_plugin_ep_virt_gpu/ep_lib_entry.cc) shows how a plugin EP library could check environment configuration entries within the library's
524+
exported `CreateEpFactories` function.
525+
526+
```c++
527+
#include "core/session/onnxruntime_env_config_keys.h"
528+
#define ORT_API_MANUAL_INIT
529+
#include "onnxruntime_cxx_api.h"
530+
#undef ORT_API_MANUAL_INIT
531+
532+
// other includes ..
533+
534+
extern "C" {
535+
EXPORT_SYMBOL OrtStatus* CreateEpFactories(const char* /*registration_name*/, const OrtApiBase* ort_api_base,
536+
const OrtLogger* default_logger,
537+
OrtEpFactory** factories, size_t max_factories, size_t* num_factories) {
538+
EXCEPTION_TO_RETURNED_STATUS_BEGIN
539+
const OrtApi* ort_api = ort_api_base->GetApi(ORT_API_VERSION);
540+
const OrtEpApi* ep_api = ort_api->GetEpApi();
541+
const OrtModelEditorApi* model_editor_api = ort_api->GetModelEditorApi();
542+
543+
// Manual init for the C++ API
544+
Ort::InitApi(ort_api);
545+
546+
if (max_factories < 1) {
547+
return ort_api->CreateStatus(ORT_INVALID_ARGUMENT,
548+
"Not enough space to return EP factory. Need at least one.");
549+
}
550+
551+
Ort::KeyValuePairs env_configs = Ort::GetEnvConfigEntries(); // Wraps OrtEpApi::GetEnvConfigEntries()
552+
553+
// Extract a config that determines whether creating virtual hardware devices is allowed.
554+
// An application can allow an EP library to create virtual devices in two ways:
555+
// 1. Use an EP library registration name that ends in the suffix ".virtual". If so, ORT will automatically
556+
// set the config key "allow_virtual_devices" to "1" in the environment.
557+
// 2. Directly set the config key "allow_virtual_devices" to "1" when creating the
558+
// OrtEnv via OrtApi::CreateEnvWithOptions().
559+
const char* config_value = env_configs.GetValue(kOrtEnvAllowVirtualDevices);
560+
const bool allow_virtual_devices = config_value != nullptr && strcmp(config_value, "1") == 0;
561+
562+
std::unique_ptr<OrtEpFactory> factory = std::make_unique<EpFactoryVirtualGpu>(*ort_api, *ep_api, *model_editor_api,
563+
allow_virtual_devices, *default_logger);
564+
565+
factories[0] = factory.release();
566+
*num_factories = 1;
567+
568+
return nullptr;
569+
EXCEPTION_TO_RETURNED_STATUS_END
570+
}
571+
572+
// ...
573+
574+
} // extern "C"
575+
```
576+
577+
An EP factory's `OrtEpFactory::GetSupportedDevices()` function may then use `OrtEpApi::CreateHardwareDevice()` to create a virtual hardware device.
578+
579+
```c++
580+
#include "core/session/onnxruntime_ep_device_ep_metadata_keys.h"
581+
// Other includes ...
582+
583+
/*static*/
584+
OrtStatus* ORT_API_CALL EpFactoryVirtualGpu::GetSupportedDevicesImpl(OrtEpFactory* this_ptr,
585+
const OrtHardwareDevice* const* /*devices*/,
586+
size_t /*num_devices*/,
587+
OrtEpDevice** ep_devices,
588+
size_t max_ep_devices,
589+
size_t* p_num_ep_devices) noexcept {
590+
size_t& num_ep_devices = *p_num_ep_devices;
591+
auto* factory = static_cast<EpFactoryVirtualGpu*>(this_ptr);
592+
593+
num_ep_devices = 0;
594+
595+
// Create a virtual OrtHardwareDevice if application indicated it is allowed (e.g., for cross-compiling).
596+
// This example EP creates a virtual GPU OrtHardwareDevice and adds a new OrtEpDevice that uses the virtual GPU.
597+
if (factory->allow_virtual_devices_ && num_ep_devices < max_ep_devices) {
598+
// A virtual hardware device should have a metadata entry "is_virtual" set to "1".
599+
OrtKeyValuePairs* hw_metadata = nullptr;
600+
factory->ort_api_.CreateKeyValuePairs(&hw_metadata);
601+
factory->ort_api_.AddKeyValuePair(hw_metadata, kOrtHardwareDevice_MetadataKey_IsVirtual, "1");
602+
603+
auto* status = factory->ep_api_.CreateHardwareDevice(OrtHardwareDeviceType::OrtHardwareDeviceType_GPU,
604+
factory->vendor_id_,
605+
/*device_id*/ 0,
606+
factory->vendor_.c_str(),
607+
hw_metadata,
608+
&factory->virtual_hw_device_);
609+
factory->ort_api_.ReleaseKeyValuePairs(hw_metadata); // Release since ORT makes a copy.
610+
611+
if (status != nullptr) {
612+
return status;
613+
}
614+
615+
OrtKeyValuePairs* ep_metadata = nullptr;
616+
OrtKeyValuePairs* ep_options = nullptr;
617+
factory->ort_api_.CreateKeyValuePairs(&ep_metadata);
618+
factory->ort_api_.CreateKeyValuePairs(&ep_options);
619+
620+
// made up example metadata values.
621+
factory->ort_api_.AddKeyValuePair(ep_metadata, "some_metadata", "1");
622+
factory->ort_api_.AddKeyValuePair(ep_options, "compile_optimization", "O3");
623+
624+
OrtEpDevice* virtual_ep_device = nullptr;
625+
status = factory->ort_api_.GetEpApi()->CreateEpDevice(factory, factory->virtual_hw_device_, ep_metadata,
626+
ep_options, &virtual_ep_device);
627+
628+
factory->ort_api_.ReleaseKeyValuePairs(ep_metadata);
629+
factory->ort_api_.ReleaseKeyValuePairs(ep_options);
630+
631+
if (status != nullptr) {
632+
return status;
633+
}
634+
635+
ep_devices[num_ep_devices++] = virtual_ep_device;
636+
}
637+
638+
return nullptr;
639+
}
640+
```
641+
642+
#### References
643+
- [Reference example plugin EP with virtual GPU](https://github.com/microsoft/onnxruntime/tree/main/onnxruntime/test/autoep/library/example_plugin_ep_virt_gpu)
644+
- [OrtEpApi::GetEnvConfigEntries C API function](https://github.com/microsoft/onnxruntime/blob/990ba5f0c3e0c8735fec8bf89dd11953224a9c03/include/onnxruntime/core/session/onnxruntime_ep_c_api.h#L1431-L1446)
645+
- [Ort::GetEnvConfigEntries C++ API function](https://github.com/microsoft/onnxruntime/blob/990ba5f0c3e0c8735fec8bf89dd11953224a9c03/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L3531-L3532)
646+
- [Plugin EP library documentation](./plugin-ep-libraries.md)
647+
- [Additional Python usage examples in unit tests](https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/python/onnxruntime_test_python_compile_api.py)
648+
- [Python ModelCompiler class](https://github.com/microsoft/onnxruntime/blob/a5ba2ba3998820dd8da111c90c420479aac7a11e/onnxruntime/python/onnxruntime_inference_collection.py#L680-L709)
649+
650+
### Usage example: EPContext weight sharing with plugin EPs
651+
The compile API also supports [EPContext resource/weight sharing](./EP-Context-Design.md#epcontext-with-weight-sharing) with plugin EPs.
652+
653+
```python
654+
import onnxruntime as ort
655+
import onnxruntime_ep_contoso_ai as contoso_ep
656+
657+
ep_lib_registration_name = "contoso_ep_lib"
658+
ort.register_execution_provider_library(ep_lib_registration_name, contoso_ep.get_library_path())
659+
660+
# The models that share resources
661+
input_models = [get_name("input_model_0.onnx"), get_name("input_model_1.onnx")]
662+
output_models = [
663+
os.path.join(self._tmp_dir_path, "output_model_0.onnx"),
664+
os.path.join(self._tmp_dir_path, "output_model_1.onnx"),
665+
]
666+
667+
# Set the EP to use for compilation
668+
ep_devices = ort.get_ep_devices()
669+
selected_ep_device = next((ep_device for ep_device in ep_devices if ep_device.ep_name == contoso_ep.get_ep_names()[0]), None)
670+
assert selected_ep_device.device.metadata["is_virtual"] == "1"
671+
672+
ep_options = {} # EP-specific options
673+
session_options = ort.SessionOptions()
674+
session_options.add_provider_for_devices(selected_ep_devices, ep_options)
675+
676+
# Set option that tells EP to share resources (e.g., weights) across sessions.
677+
session_options.add_session_config_entry("ep.share_ep_contexts", "1")
678+
679+
# Compile individual models
680+
for i in range(len(input_models)):
681+
if i == num_models - 1:
682+
# Tell EP that this is the last compiling session that will be sharing resources.
683+
session_options.add_session_config_entry("ep.stop_share_ep_contexts", "1")
684+
685+
model_compiler = onnxrt.ModelCompiler(
686+
session_options,
687+
input_models[i],
688+
# ... other options ...
689+
)
690+
model_compiler.compile_to_file(output_models[i])
691+
692+
# Unregister the library using the same registration name specified earlier.
693+
# Must only unregister a library after all `ModelCompiler` objects that use the library have been released.
694+
ort.unregister_execution_provider_library(ep_lib_registration_name)
382695
```

0 commit comments

Comments
 (0)