diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index 58473a79ddaa6..9d006d258c36c 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -1461,12 +1461,23 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
return Resolve(default_options);
}
+ ///
+ /// This function converts all the graph TensorProto initializers into OrtValues
+ /// and creates a in-memory external data reference for each OrtValue. It validates external paths data references.
+ ///
+ ///
+ ///
+ Status ConvertInitializersIntoOrtValues(gsl::span whitelisted_external_paths);
+
///
/// This function converts all the graph TensorProto initializers into OrtValues
/// and creates a in-memory external data reference for each OrtValue.
+ /// External data paths are restricted to the model directory.
///
///
- Status ConvertInitializersIntoOrtValues();
+ Status ConvertInitializersIntoOrtValues() {
+ return ConvertInitializersIntoOrtValues(gsl::span());
+ }
/**
* @brief This function examines the specified initializers in the graph and converts them inline
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 77c2ff795e800..7a19a06370f27 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -7221,6 +7221,23 @@ struct OrtApi {
_Outptr_result_maybenull_ const int64_t** shape_data,
_Out_ size_t* shape_data_count);
+ /** \brief Set whitelisted data folders for external data loading.
+ *
+ * Sets a semicolon-separated list of absolute directory paths that are allowed as sources
+ * for external data. Each path must be an absolute path to an existing directory and must not
+ * be a symbolic link.
+ *
+ * \param[in] options Session options instance.
+ * \param[in] whitelisted_data_folders Semicolon-separated list of absolute directory paths, or
+ * an empty string to clear the whitelist. This pointer must not be NULL.
+ *
+ * \return nullptr on success, or an OrtStatus on failure.
+ *
+ * \since Version 1.24.
+ */
+ ORT_API2_STATUS(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
+ _In_ const ORTCHAR_T* whitelisted_data_folders);
+
/** \brief Enable profiling for this run
*
* \param[in] options
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 2c1d52894e7f3..8eb6b6ff8326f 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -1558,6 +1558,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl {
///< Wraps OrtApi::AddFreeDimensionOverrideByName
SessionOptionsImpl& AddFreeDimensionOverrideByName(const char* dim_name, int64_t dim_value);
+ ///< Wraps OrtApi::SessionOptionsSetWhiteListedDataFolders
+ SessionOptionsImpl& SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders);
};
} // namespace detail
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 745128fe6c7b4..d0bfb33d78dd7 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -1304,6 +1304,12 @@ inline SessionOptionsImpl& SessionOptionsImpl::SetLoadCancellationFlag(boo
return *this;
}
+template
+inline SessionOptionsImpl& SessionOptionsImpl::SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders) {
+ ThrowOnError(GetApi().SessionOptionsSetWhiteListedDataFolders(this->p_, whitelisted_data_folders));
+ return *this;
+}
+
template
inline SessionOptionsImpl& SessionOptionsImpl::SetLogId(const char* logid) {
ThrowOnError(GetApi().SetSessionLogId(this->p_, logid));
diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
index b328fc916f885..cd5450c6fe862 100644
--- a/onnxruntime/core/framework/session_options.h
+++ b/onnxruntime/core/framework/session_options.h
@@ -226,6 +226,10 @@ struct SessionOptions {
bool has_explicit_ep_context_gen_options = false;
epctx::ModelGenOptions ep_context_gen_options = {};
epctx::ModelGenOptions GetEpContextGenerationOptions() const;
+
+ // Semicolon-separated list of whitelisted data folder paths.
+ // Used to restrict where external data can be loaded from.
+ PathString whitelisted_data_folders;
};
inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_options) {
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index e0b31c29a054b..08a27a1541db7 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -328,24 +328,122 @@ Status TensorProtoWithExternalDataToTensorProto(
return Status::OK();
}
+Status ParseWhiteListedPaths(const PathString& paths_str,
+ /*out*/ InlinedVector& paths) {
+ if (paths_str.empty()) {
+ paths.clear();
+ return Status::OK();
+ }
+
+ InlinedVector result;
+
+ auto process_path = [&](const PathString& p_str) -> Status {
+ if (p_str.empty()) return Status::OK();
+ std::filesystem::path path(p_str);
+ std::error_code ec;
+ if (!path.is_absolute()) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+ "Whitelisted data path is not absolute: ", path.string());
+ }
+ // canonical() resolves all symlinks and requires the path to exist.
+ // If it fails, the path either doesn't exist or can't be resolved.
+ auto canonical_path = std::filesystem::canonical(path, ec);
+ if (ec) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+ "Whitelisted data path does not exist or cannot be resolved: ", path.string());
+ }
+ // Walk each component of the canonical path and check for symlinks.
+ // We choose with approach because both canonical() and weakly_canonical() on Windows
+ // (MSVC's implementation) resolve symlinks for existing path components
+ // using the same underlying Win32 API (GetFinalPathNameByHandle).
+ // So comparing them always produces an equal result, making symlink detection impossible via comparison.
+ // We check the canonical path (not the original) so that normalization differences
+ // (trailing slashes, "..", ".") don't interfere, while still detecting symlinks
+ // that may exist along the resolved path.
+ {
+ auto normalized = path.lexically_normal();
+ std::filesystem::path accumulated;
+ for (const auto& component : normalized) {
+ accumulated /= component;
+ // Skip checking the root (e.g. "C:\" or "/") since is_symlink would fail or be meaningless.
+ if (accumulated == normalized.root_path()) {
+ continue;
+ }
+ if (std::filesystem::is_symlink(accumulated, ec)) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+ "Whitelisted data path contains a symlink: ", path.string());
+ }
+ }
+ }
+
+ if (!std::filesystem::is_directory(canonical_path, ec) || ec) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+ "Whitelisted data path is not a directory: ", path.string());
+ }
+ result.push_back(canonical_path);
+ return Status::OK();
+ };
+
+ constexpr PathChar kSemiColonSep = ORT_TSTR(';');
+
+ size_t start = 0;
+ size_t end = paths_str.find(kSemiColonSep);
+
+ while (end != PathString::npos) {
+ ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start, end - start)));
+ start = end + 1;
+ end = paths_str.find(kSemiColonSep, start);
+ }
+ ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start)));
+
+ paths = std::move(result);
+ return Status::OK();
+}
+
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
- const std::filesystem::path& location) {
+ const std::filesystem::path& location,
+ gsl::span whitelisted_external_folders) {
// Reject absolute paths
ORT_RETURN_IF(location.is_absolute(),
"Absolute paths not allowed for external data location");
- if (!base_dir.empty()) {
- // Resolve and verify the path stays within model directory
- auto base_canonical = std::filesystem::weakly_canonical(base_dir);
- // If the symlink exists, it resolves to the target path;
- // so if the symllink is outside the directory it would be caught here.
- auto resolved = std::filesystem::weakly_canonical(base_dir / location);
- // Check that resolved path starts with base directory
+
+ auto validate_location_under_dir = [&location](const std::filesystem::path& dir) -> bool {
+ if (dir.empty()) {
+ return false;
+ }
+ auto base_canonical = std::filesystem::weakly_canonical(dir);
+ auto resolved = std::filesystem::weakly_canonical(dir / location);
auto [base_end, resolved_it] = std::mismatch(
base_canonical.begin(), base_canonical.end(),
resolved.begin(), resolved.end());
- ORT_RETURN_IF(base_end != base_canonical.end(),
- "External data path: ", location, " escapes model directory: ", base_dir);
+ return base_end == base_canonical.end();
+ };
+
+ if (!base_dir.empty()) {
+ if (validate_location_under_dir(base_dir)) {
+ return Status::OK();
+ }
+ }
+
+ // base_dir validation failed or base_dir is empty, try whitelisted folders
+ if (!whitelisted_external_folders.empty()) {
+ for (const auto& folder : whitelisted_external_folders) {
+ if (validate_location_under_dir(folder)) {
+ return Status::OK();
+ }
+ }
+
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+ "External data path: ", location,
+ " is not under any allowed directory");
}
+
+ // No whitelisted folders supplied
+ if (!base_dir.empty()) {
+ return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+ "External data path: ", location, " escapes model directory: ", base_dir);
+ }
+
return Status::OK();
}
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 685fa65a73720..7908b08929a5a 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -525,17 +525,31 @@ Status TensorProtoWithExternalDataToTensorProto(
const std::filesystem::path& model_path,
ONNX_NAMESPACE::TensorProto& new_tensor_proto);
+///
+/// This function parses the input string which is expected to be a list of paths separated by ';'
+/// and returns a vector of std::filesystem::paths. The function also validates that each path is an absolute path of a
+/// folder, it is not a symlink and actually exists on the file system.
+///
+///
+///
+/// Status
+Status ParseWhiteListedPaths(const PathString& paths_str,
+ /*out*/ InlinedVector& paths);
+
///
/// The functions will make sure the 'location' specified in the external data is under the 'base_dir'.
/// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path.
+/// If validation fails for base_dir, the function will check against whitelisted_external_folders.
///
/// model location directory
/// location is a string retrieved from TensorProto external data that is not
/// an in-memory tag
+/// additional folders where external data is allowed
/// The function will fail if the resolved full path is not under the model directory
-/// or one of the subdirectories
+/// or one of the whitelisted folders
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
- const std::filesystem::path& location);
+ const std::filesystem::path& location,
+ gsl::span whitelisted_external_folders = {});
#endif // !defined(SHARED_PROVIDER)
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 779ca5d180518..47b37ac80f47c 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3737,7 +3737,7 @@ Status Graph::Resolve(const ResolveOptions& options) {
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
}
-Status Graph::ConvertInitializersIntoOrtValues() {
+Status Graph::ConvertInitializersIntoOrtValues(gsl::span whitelisted_external_paths) {
std::vector all_subgraphs;
FindAllSubgraphs(all_subgraphs);
@@ -3771,7 +3771,7 @@ Status Graph::ConvertInitializersIntoOrtValues() {
std::unique_ptr external_data_info;
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
const auto& location = external_data_info->GetRelPath();
- auto st = utils::ValidateExternalDataPath(model_dir, location);
+ auto st = utils::ValidateExternalDataPath(model_dir, location, whitelisted_external_paths);
if (!st.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"External data path validation failed for initializer: ", tensor_proto.name(),
diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h
index f5421d8540db8..1ed78c89e722d 100644
--- a/onnxruntime/core/providers/shared_library/provider_api.h
+++ b/onnxruntime/core/providers/shared_library/provider_api.h
@@ -453,11 +453,6 @@ inline bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto
return g_host->Utils__HasExternalDataInMemory(ten_proto);
}
-inline Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
- const std::filesystem::path& location) {
- return g_host->Utils__ValidateExternalDataPath(base_dir, location);
-}
-
} // namespace utils
namespace graph_utils {
diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h
index aeaf05cf14591..9cbbc6234a99b 100644
--- a/onnxruntime/core/providers/shared_library/provider_interfaces.h
+++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h
@@ -1004,9 +1004,6 @@ struct ProviderHost {
virtual bool Utils__HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto) = 0;
- virtual Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
- const std::filesystem::path& location) = 0;
-
// Model
virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc
index 3df6d37d63794..7c1d4b558bf9c 100644
--- a/onnxruntime/core/session/abi_session_options.cc
+++ b/onnxruntime/core/session/abi_session_options.cc
@@ -413,3 +413,14 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetLoadCancellationFlag, _Inout_ OrtS
return nullptr;
API_IMPL_END
}
+
+ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
+ _In_ const ORTCHAR_T* whitelisted_data_folders) {
+ API_IMPL_BEGIN
+ if (whitelisted_data_folders == nullptr) {
+ return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Input whitelisted_data_folders is nullptr");
+ }
+ options->value.whitelisted_data_folders = whitelisted_data_folders;
+ return nullptr;
+ API_IMPL_END
+}
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index c00d63d0be8a2..e14942b8f5b39 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1390,7 +1390,10 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
// auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get(), tensor_proto.name(),
// use_tensor_buffer_true);
// ORT_RETURN_IF_ERROR(graph.ReplaceInitializedTensor(tensor_proto_to_add, ort_value));
- ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues());
+ InlinedVector whitelisted_external_data_folders;
+ ORT_RETURN_IF_ERROR_SESSIONID_(utils::ParseWhiteListedPaths(session_options_.whitelisted_data_folders,
+ whitelisted_external_data_folders));
+ ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues(whitelisted_external_data_folders));
auto apply_transformer_once = [](const GraphTransformer& transformer, const logging::Logger& logger,
Graph& graph, bool* is_graph_modified = nullptr) -> onnxruntime::common::Status {
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 7a027c8eafb81..014039d314fdd 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -4803,6 +4803,7 @@ static constexpr OrtApi ort_api_1_to_25 = {
&OrtApis::EpAssignedNode_GetOperatorType,
&OrtApis::RunOptionsSetSyncStream,
&OrtApis::GetTensorElementTypeAndShapeDataReference,
+ &OrtApis::SessionOptionsSetWhiteListedDataFolders,
// End of Version 24 - DO NOT MODIFY ABOVE (see above text for more information)
&OrtApis::RunOptionsEnableProfiling,
@@ -4843,7 +4844,7 @@ static_assert(offsetof(OrtApi, SetEpDynamicOptions) / sizeof(void*) == 284, "Siz
static_assert(offsetof(OrtApi, GetEpApi) / sizeof(void*) == 317, "Size of version 22 API cannot change");
static_assert(offsetof(OrtApi, CreateExternalInitializerInfo) / sizeof(void*) == 389, "Size of version 23 API cannot change");
-static_assert(offsetof(OrtApi, GetTensorElementTypeAndShapeDataReference) / sizeof(void*) == 414, "Size of version 24 API cannot change");
+static_assert(offsetof(OrtApi, SessionOptionsSetWhiteListedDataFolders) / sizeof(void*) == 415, "Size of version 24 API cannot change");
// So that nobody forgets to finish an API version, this check will serve as a reminder:
static_assert(std::string_view(ORT_VERSION) == "1.25.0",
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 3d990909cfb41..8edd396d8b7ca 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -78,7 +78,8 @@ ORT_API_STATUS_IMPL(CreateCustomOpDomain, _In_ const char* domain, _Outptr_ OrtC
ORT_API_STATUS_IMPL(CustomOpDomain_Add, _Inout_ OrtCustomOpDomain* custom_op_domain, _In_ const OrtCustomOp* op);
ORT_API_STATUS_IMPL(AddCustomOpDomain, _Inout_ OrtSessionOptions* options, _In_ OrtCustomOpDomain* custom_op_domain);
ORT_API_STATUS_IMPL(RegisterCustomOpsLibrary, _Inout_ OrtSessionOptions* options, _In_ const char* library_path, _Outptr_ void** library_handle);
-
+ORT_API_STATUS_IMPL(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
+ _In_ const ORTCHAR_T* whitelisted_data_folders);
ORT_API_STATUS_IMPL(SessionGetInputCount, _In_ const OrtSession* sess, _Out_ size_t* out);
ORT_API_STATUS_IMPL(SessionGetOutputCount, _In_ const OrtSession* sess, _Out_ size_t* out);
ORT_API_STATUS_IMPL(SessionGetOverridableInitializerCount, _In_ const OrtSession* sess, _Out_ size_t* out);
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 6949ed0059add..e5bbd656bc325 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1295,11 +1295,6 @@ struct ProviderHostImpl : ProviderHost {
return onnxruntime::utils::HasExternalDataInMemory(ten_proto);
}
- Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
- const std::filesystem::path& location) override {
- return onnxruntime::utils::ValidateExternalDataPath(base_path, location);
- }
-
// Model (wrapped)
std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc
index 0d7b583faf27b..4191d7a9a219b 100644
--- a/onnxruntime/test/framework/tensorutils_test.cc
+++ b/onnxruntime/test/framework/tensorutils_test.cc
@@ -511,18 +511,22 @@ class PathValidationTest : public ::testing::Test {
// Create a temporary directory for the tests.
base_dir_ = std::filesystem::temp_directory_path() / "PathValidationTest";
outside_dir_ = std::filesystem::temp_directory_path() / "outside";
+ whitelisted_dir_ = std::filesystem::temp_directory_path() / "whitelisted";
std::filesystem::create_directories(base_dir_);
std::filesystem::create_directories(outside_dir_);
+ std::filesystem::create_directories(whitelisted_dir_);
}
void TearDown() override {
// Clean up the temporary directory.
std::filesystem::remove_all(base_dir_);
std::filesystem::remove_all(outside_dir_);
+ std::filesystem::remove_all(whitelisted_dir_);
}
std::filesystem::path base_dir_;
std::filesystem::path outside_dir_;
+ std::filesystem::path whitelisted_dir_;
};
// Test cases for ValidateExternalDataPath.
@@ -586,5 +590,272 @@ TEST_F(PathValidationTest, ValidateExternalDataPathWithSymlinkOutside) {
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "outside_link.bin").IsOK());
}
+TEST_F(PathValidationTest, ValidateExternalDataPathWithWhitelistedFolder) {
+ // Path is valid under the whitelisted folder directly.
+ std::vector whitelist = {outside_dir_};
+ ASSERT_STATUS_OK(utils::ValidateExternalDataPath(outside_dir_, "data.bin", whitelist));
+}
+
+TEST_F(PathValidationTest, ValidateExternalDataPathEscapesBaseButMatchesWhitelist) {
+ std::vector whitelist = {whitelisted_dir_};
+ // "data.bin" is valid under base_dir_, no need for whitelist
+ ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, "data.bin", whitelist));
+
+ // Create a subdirectory of whitelisted_dir_ and use that as the whitelisted folder.
+ auto whitelisted_sub = whitelisted_dir_ / "sub";
+ std::filesystem::create_directories(whitelisted_sub);
+ std::vector whitelist2 = {whitelisted_sub};
+
+ // "../data.bin" escapes base_dir_ and also escapes whitelisted_sub
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin").IsOK());
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin", whitelist2).IsOK());
+}
+
+TEST_F(PathValidationTest, ValidateExternalDataPathWhitelistSavesEscapingPath) {
+ // Location "../outside/data.bin" escapes base_dir_ but resolves under outside_dir_.
+ auto relative_to_outside = std::filesystem::path("..") / "outside" / "data.bin";
+ std::vector whitelist = {outside_dir_};
+
+ // Without whitelist, it should fail.
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, relative_to_outside).IsOK());
+
+ // With whitelist containing outside_dir_, it should succeed because
+ // outside_dir_ / "../outside/data.bin" resolves under outside_dir_.
+ ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, relative_to_outside, whitelist));
+}
+
+TEST_F(PathValidationTest, ValidateExternalDataPathWhitelistDoesNotMatchEither) {
+ // Location escapes both base_dir and all whitelisted folders.
+ auto unrelated_dir = std::filesystem::temp_directory_path() / "unrelated_PathValidationTest";
+ std::filesystem::create_directories(unrelated_dir);
+ auto cleanup = [&]() { std::filesystem::remove_all(unrelated_dir); };
+
+ std::vector whitelist = {whitelisted_dir_};
+ auto escaping_location = std::filesystem::path("..") / "unrelated_PathValidationTest" / "data.bin";
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, escaping_location, whitelist).IsOK());
+
+ cleanup();
+}
+
+TEST_F(PathValidationTest, ValidateExternalDataPathEmptyWhitelist) {
+ // Empty whitelist should behave the same as no whitelist.
+ std::vector empty_whitelist;
+ ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, "data.bin", empty_whitelist));
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin", empty_whitelist).IsOK());
+}
+
+TEST_F(PathValidationTest, ValidateExternalDataPathMultipleWhitelistedFolders) {
+ // First whitelisted folder doesn't match, second one does.
+ auto another_dir = std::filesystem::temp_directory_path() / "another_PathValidationTest";
+ std::filesystem::create_directories(another_dir);
+ auto cleanup = [&]() { std::filesystem::remove_all(another_dir); };
+
+ auto relative_to_outside = std::filesystem::path("..") / "outside" / "data.bin";
+ std::vector whitelist = {another_dir, outside_dir_};
+
+ // Escapes base_dir_ but outside_dir_ (second whitelist entry) should match.
+ ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, relative_to_outside, whitelist));
+
+ cleanup();
+}
+
+TEST_F(PathValidationTest, ValidateExternalDataPathAbsoluteLocationRejectsEvenWithWhitelist) {
+ // Absolute paths are always rejected, regardless of whitelist.
+ std::vector whitelist = {outside_dir_};
+#ifdef _WIN32
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "C:\\data.bin", whitelist).IsOK());
+#else
+ ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "/data.bin", whitelist).IsOK());
+#endif
+}
+
+// Test fixture for ParseWhiteListedPaths tests.
+class ParseWhiteListedPathsTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ test_dir_ = std::filesystem::temp_directory_path() / "ParseWhiteListedPathsTest";
+ sub_dir_a_ = test_dir_ / "dir_a";
+ sub_dir_b_ = test_dir_ / "dir_b";
+ std::filesystem::create_directories(sub_dir_a_);
+ std::filesystem::create_directories(sub_dir_b_);
+
+ // Canonicalize the paths so that tests can compare against what ParseWhiteListedPaths stores.
+ test_dir_ = std::filesystem::canonical(test_dir_);
+ sub_dir_a_ = std::filesystem::canonical(sub_dir_a_);
+ sub_dir_b_ = std::filesystem::canonical(sub_dir_b_);
+
+ // Create a regular file (not a directory)
+ regular_file_ = test_dir_ / "file.txt";
+ std::ofstream{regular_file_};
+ }
+
+ void TearDown() override {
+ std::filesystem::remove_all(test_dir_);
+ }
+
+ PathString ToOrtPath(const std::filesystem::path& p) {
+#ifdef _WIN32
+ return p.wstring();
+#else
+ return p.string();
+#endif
+ }
+
+ std::filesystem::path test_dir_;
+ std::filesystem::path sub_dir_a_;
+ std::filesystem::path sub_dir_b_;
+ std::filesystem::path regular_file_;
+};
+
+TEST_F(ParseWhiteListedPathsTest, EmptyStringReturnsOkAndEmptyVector) {
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(PathString(), paths));
+ EXPECT_TRUE(paths.empty());
+}
+
+TEST_F(ParseWhiteListedPathsTest, SingleValidAbsoluteDirectory) {
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(ToOrtPath(sub_dir_a_), paths));
+ ASSERT_EQ(paths.size(), 1u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+}
+
+TEST_F(ParseWhiteListedPathsTest, MultipleValidAbsoluteDirectories) {
+ PathString combined = ToOrtPath(sub_dir_a_) + ORT_TSTR(';') + ToOrtPath(sub_dir_b_);
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(combined, paths));
+ ASSERT_EQ(paths.size(), 2u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+ EXPECT_EQ(paths[1], sub_dir_b_);
+}
+
+TEST_F(ParseWhiteListedPathsTest, RelativePathReturnsError) {
+ InlinedVector paths;
+ PathString relative_path = ORT_TSTR("relative_dir");
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(relative_path, paths),
+ "not absolute");
+}
+
+TEST_F(ParseWhiteListedPathsTest, NonExistentPathReturnsError) {
+ auto non_existent = test_dir_ / "does_not_exist";
+ InlinedVector paths;
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(ToOrtPath(non_existent), paths),
+ "does not exist");
+}
+
+TEST_F(ParseWhiteListedPathsTest, FileNotDirectoryReturnsError) {
+ InlinedVector paths;
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(ToOrtPath(regular_file_), paths),
+ "not a directory");
+}
+
+TEST_F(ParseWhiteListedPathsTest, EmptySegmentBetweenSeparatorsIsSkipped) {
+ // "dir_a;;dir_b" has an empty segment between the two semicolons
+ PathString combined = ToOrtPath(sub_dir_a_) + ORT_TSTR(';') + ORT_TSTR(';') + ToOrtPath(sub_dir_b_);
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(combined, paths));
+ ASSERT_EQ(paths.size(), 2u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+ EXPECT_EQ(paths[1], sub_dir_b_);
+}
+
+TEST_F(ParseWhiteListedPathsTest, TrailingSeparatorProducesEmptySegment) {
+ PathString with_trailing = ToOrtPath(sub_dir_a_) + ORT_TSTR(';');
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(with_trailing, paths));
+ ASSERT_EQ(paths.size(), 1u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+}
+
+TEST_F(ParseWhiteListedPathsTest, LeadingSeparatorProducesEmptySegment) {
+ PathString with_leading = ORT_TSTR(';') + ToOrtPath(sub_dir_a_);
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(with_leading, paths));
+ ASSERT_EQ(paths.size(), 1u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+}
+
+TEST_F(ParseWhiteListedPathsTest, OutParamIsClearedOnEachCall) {
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(ToOrtPath(sub_dir_a_), paths));
+ ASSERT_EQ(paths.size(), 1u);
+
+ // Call again with empty string; paths should be cleared
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(PathString(), paths));
+ EXPECT_TRUE(paths.empty());
+}
+
+TEST_F(ParseWhiteListedPathsTest, SymlinkDirectoryReturnsError) {
+ auto link_path = test_dir_ / "link_to_dir";
+ try {
+ std::filesystem::create_directory_symlink(sub_dir_a_, link_path);
+ } catch (const std::exception& e) {
+ GTEST_SKIP() << "Skipping symlink test: symlink creation not supported. " << e.what();
+ }
+ InlinedVector paths;
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(ToOrtPath(link_path), paths),
+ "contains a symlink");
+}
+
+TEST_F(ParseWhiteListedPathsTest, SymlinkInIntermediateComponentReturnsError) {
+ // Create: test_dir_/link_to_dir_a -> sub_dir_a_, then use test_dir_/link_to_dir_a/nested as the path.
+ // Even though the final target is a real directory, the path has a symlink component.
+ auto nested_dir = sub_dir_a_ / "nested";
+ std::filesystem::create_directories(nested_dir);
+ auto link_in_path = test_dir_ / "link_to_dir_a";
+ try {
+ std::filesystem::create_directory_symlink(sub_dir_a_, link_in_path);
+ } catch (const std::exception& e) {
+ GTEST_SKIP() << "Skipping symlink test: symlink creation not supported. " << e.what();
+ }
+ auto path_through_link = link_in_path / "nested";
+ InlinedVector paths;
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(ToOrtPath(path_through_link), paths),
+ "contains a symlink");
+}
+
+TEST_F(ParseWhiteListedPathsTest, OnlySeparatorsReturnsEmptyVector) {
+ PathString only_seps = ORT_TSTR(";;;");
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(only_seps, paths));
+ EXPECT_TRUE(paths.empty());
+}
+
+TEST_F(ParseWhiteListedPathsTest, ErrorOnSecondPathDoesNotModifyOutput) {
+ // Pre-populate paths to verify it is not modified on error
+ InlinedVector paths;
+ paths.push_back(std::filesystem::path(ORT_TSTR("/dummy/sentinel")));
+
+ auto non_existent = test_dir_ / "no_such_dir";
+ PathString combined = ToOrtPath(sub_dir_a_) + ORT_TSTR(';') + ToOrtPath(non_existent);
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(combined, paths),
+ "does not exist");
+ // Output container must be unchanged on error
+ ASSERT_EQ(paths.size(), 1u);
+ EXPECT_EQ(paths[0], std::filesystem::path(ORT_TSTR("/dummy/sentinel")));
+}
+
+TEST_F(ParseWhiteListedPathsTest, OutParamUnchangedOnError) {
+ // First call succeeds
+ InlinedVector paths;
+ ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(ToOrtPath(sub_dir_a_), paths));
+ ASSERT_EQ(paths.size(), 1u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+
+ // Second call fails - paths should retain the previous successful result
+ PathString relative_path = ORT_TSTR("relative_dir");
+ ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR(
+ utils::ParseWhiteListedPaths(relative_path, paths),
+ "not absolute");
+ ASSERT_EQ(paths.size(), 1u);
+ EXPECT_EQ(paths[0], sub_dir_a_);
+}
+
} // namespace test
} // namespace onnxruntime
diff --git a/onnxruntime/test/ir/graph_test.cc b/onnxruntime/test/ir/graph_test.cc
index 4d80cb704748c..5a5c3569ac7ac 100644
--- a/onnxruntime/test/ir/graph_test.cc
+++ b/onnxruntime/test/ir/graph_test.cc
@@ -2817,7 +2817,7 @@ TEST_F(GraphTest, ShapeInferenceAfterInitializerExternalization) {
<< "We no longer externalize data in the Graph constructor.";
// Now externalize explicitly to trigger the bug scenario
- ASSERT_STATUS_OK(graph.ConvertInitializersIntoOrtValues());
+ ASSERT_STATUS_OK(graph.ConvertInitializersIntoOrtValues({}));
ASSERT_TRUE(graph.GetInitializedTensor("split_sizes", initializer_after));
ASSERT_NE(initializer_after, nullptr);
ASSERT_TRUE(utils::HasExternalDataInMemory(*initializer_after))