Skip to content

Commit 19614f0

Browse files
authored
Avoid empty temp external initializer file creation inside the call (#25188)
### Description While EPContext model generation is enabled and some Nodes fallback on CPU. If the CPU nodes depend on external data. ORT force all external data to be embedded into new generated EPContext model by default. Ort used to create a dummy externa initializer file with maximum size threshold to force all initializer data dump into generated Onnx model file. Internally, a "./model_ext_ini.bin" file is created and got removed at the end of the call. It causes problem if multiple session doing the same thing. This fix is to avoid creating the temp empty external initializer file by adding a flag to force all external data to be embedded into new generated EPContext model.
1 parent 00c20c8 commit 19614f0

File tree

4 files changed

+54
-19
lines changed

4 files changed

+54
-19
lines changed

include/onnxruntime/core/graph/model_saving_options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ struct ModelSavingOptions {
3939
#else
4040
int64_t allocation_granularity = 4096;
4141
#endif
42+
// Force embed all external initializer into the Onnx file
43+
// Used for EPContext model generation while some nodes fallback on CPU which has external data dependency
44+
bool force_embed_external_ini = false;
4245
};
4346

4447
} // namespace onnxruntime

onnxruntime/core/framework/graph_partitioner.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -911,13 +911,16 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
911911

912912
size_t ini_size_threshold = ep_context_gen_options.output_external_initializer_size_threshold;
913913
std::filesystem::path external_ini_path = ep_context_gen_options.output_external_initializers_file_path;
914+
bool force_embed_external_ini = false;
914915
if (external_ini_path.empty()) {
915-
// Set the threshold to the max so all initializers are forced into the Onnx file
916+
// if no external ini file specified, set force_embed_external_ini to true to avoid intermedia file creation
917+
// and force all initializers embed into the Onnx file
916918
ini_size_threshold = SIZE_MAX;
917-
external_ini_path = "./model_ext_ini.bin";
919+
force_embed_external_ini = true;
918920
}
919921

920922
ModelSavingOptions model_saving_options{ini_size_threshold};
923+
model_saving_options.force_embed_external_ini = force_embed_external_ini;
921924

922925
if (saving_to_buffer) {
923926
ORT_RETURN_IF_ERROR(ep_context_model.MainGraph().Resolve());

onnxruntime/core/graph/graph.cc

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4345,7 +4345,8 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
43454345
std::vector<uint8_t> raw_data;
43464346
ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
43474347
size_t tensor_bytes_size = raw_data.size();
4348-
if (tensor_bytes_size < model_saving_options.initializer_size_threshold) {
4348+
if (model_saving_options.force_embed_external_ini ||
4349+
tensor_bytes_size < model_saving_options.initializer_size_threshold) {
43494350
*output_proto = initializer;
43504351
// Data with size above the threshold is written into the new external initializer file
43514352
// Data with size below the threshold should be kept inside the new model file
@@ -4441,25 +4442,31 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(
44414442
const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;
44424443
const auto& model_path = ModelPath();
44434444

4444-
// Create the external file.
4445-
std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
4446-
auto const external_empty_pos = external_stream.tellp();
4447-
ORT_ENFORCE(external_stream.is_open(), "Failed to open for writing:", modified_external_file_path);
4445+
std::ofstream external_stream;
4446+
std::streampos external_empty_pos;
44484447
int64_t external_offset = 0;
4448+
if (!model_saving_options.force_embed_external_ini) {
4449+
// Create the external file.
4450+
external_stream.open(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
4451+
external_empty_pos = external_stream.tellp();
4452+
ORT_ENFORCE(external_stream.is_open(), "Failed to open for writing:", modified_external_file_path);
4453+
}
44494454

44504455
ORT_THROW_IF_ERROR(AddExternalInitializersToGraphProtoImpl(model_path, external_file_path,
44514456
modified_external_file_path, model_saving_options,
44524457
result,
44534458
external_stream, external_offset));
44544459

4455-
if (!external_stream.flush()) {
4456-
ORT_THROW("Failed to flush file with external initializers: ", modified_external_file_path);
4457-
}
4460+
if (!model_saving_options.force_embed_external_ini) {
4461+
if (!external_stream.flush()) {
4462+
ORT_THROW("Failed to flush file with external initializers: ", modified_external_file_path);
4463+
}
44584464

4459-
// Delete if the external data file is empty
4460-
if (external_empty_pos == external_stream.tellp()) {
4461-
external_stream.close();
4462-
std::remove(modified_external_file_path.string().c_str());
4465+
// Delete if the external data file is empty
4466+
if (external_empty_pos == external_stream.tellp()) {
4467+
external_stream.close();
4468+
std::remove(modified_external_file_path.string().c_str());
4469+
}
44634470
}
44644471

44654472
return result;

onnxruntime/test/providers/qnn/qnn_ep_context_test.cc

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryMultiPartitionSupport2) {
748748
QnnContextBinaryMultiPartitionTestBody(single_ep_node);
749749
}
750750

751-
void EpCtxCpuNodeWithExternalIniFileTestBody(bool expect_external_ini_file) {
751+
void EpCtxCpuNodeWithExternalIniFileTestBody(bool expect_external_ini_file, bool load_model_from_buffer = false) {
752752
ProviderOptions provider_options;
753753
provider_options["backend_type"] = "htp";
754754

@@ -787,7 +787,22 @@ void EpCtxCpuNodeWithExternalIniFileTestBody(bool expect_external_ini_file) {
787787
so.AddConfigEntry(kOrtSessionOptionsEpContextModelExternalInitializersFileName, external_ini_file.c_str());
788788
} // otherwise all initializers are in Onnx file, no external data file generated
789789

790-
Ort::Session session(*ort_env, ToPathString(model_with_ext).c_str(), so);
790+
if (load_model_from_buffer) {
791+
std::vector<char> buffer;
792+
{
793+
std::ifstream file(model_with_ext, std::ios::binary | std::ios::ate);
794+
if (!file)
795+
ORT_THROW("Error reading model");
796+
buffer.resize(narrow<size_t>(file.tellg()));
797+
file.seekg(0, std::ios::beg);
798+
if (!file.read(buffer.data(), buffer.size()))
799+
ORT_THROW("Error reading model");
800+
}
801+
so.AddConfigEntry(kOrtSessionOptionsModelExternalInitializersFileFolderPath, "./testdata/");
802+
Ort::Session session(*ort_env, buffer.data(), buffer.size(), so);
803+
} else {
804+
Ort::Session session(*ort_env, ToPathString(model_with_ext).c_str(), so);
805+
}
791806

792807
EXPECT_TRUE(std::filesystem::exists(ep_context_model_file.c_str()));
793808
if (expect_external_ini_file) {
@@ -803,18 +818,25 @@ void EpCtxCpuNodeWithExternalIniFileTestBody(bool expect_external_ini_file) {
803818
CleanUpCtxFile(ep_context_model_file);
804819
}
805820

806-
// Set the external initializer size threshold to 1024 so FusedMatMul (which fallback on CPU)
821+
// Set the session option "ep.context_model_external_initializers_file_name" so FusedMatMul (which fallback on CPU)
807822
// will dump initializer data to external file
808823
TEST_F(QnnHTPBackendTests, QnnContextBinaryCpuNodeWithExternalWeights) {
809824
EpCtxCpuNodeWithExternalIniFileTestBody(true);
810825
}
811826

812-
// Use the default external initializer size threshold (1024000) so FusedMatMul (which fallback on CPU)
813-
// will NOT dump initializer data to external file
827+
// Without setting the session option "ep.context_model_external_initializers_file_name"
828+
// so FusedMatMul (which fallback on CPU) will NOT dump initializer data to external file
814829
TEST_F(QnnHTPBackendTests, QnnContextBinaryCpuNodeWithoutExternalWeights) {
815830
EpCtxCpuNodeWithExternalIniFileTestBody(false);
816831
}
817832

833+
// Load model from memory
834+
// Without setting the session option "ep.context_model_external_initializers_file_name"
835+
// so FusedMatMul (which fallback on CPU) will NOT dump initializer data to external file
836+
TEST_F(QnnHTPBackendTests, QnnContextBinaryCpuNodeWithoutExternalWeightsModelFromMemory) {
837+
EpCtxCpuNodeWithExternalIniFileTestBody(false, true);
838+
}
839+
818840
// Set ep.context_file_path to folder path which is not a valid option, check the error message
819841
TEST_F(QnnHTPBackendTests, QnnContextBinaryGenerationFolderPathNotExpected) {
820842
ProviderOptions provider_options;

0 commit comments

Comments
 (0)