From a06caead0cffb1efb1463616344b1950fa14e7c0 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Feb 2026 18:27:51 -0800 Subject: [PATCH 1/5] introduce parsing for whitelisted paths --- .../core/framework/tensorprotoutils.cc | 51 ++++++ onnxruntime/core/framework/tensorprotoutils.h | 11 ++ .../test/framework/tensorutils_test.cc | 164 ++++++++++++++++++ 3 files changed, 226 insertions(+) diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index e0b31c29a054b..7cb927fb9a23c 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -328,6 +328,57 @@ Status TensorProtoWithExternalDataToTensorProto( return Status::OK(); } +Status ParseWhiteListedPaths(const PathString& paths_str, + /*out*/ InlinedVector& paths) { + if (paths_str.empty()) { + paths.clear(); + return Status::OK(); + } + + InlinedVector result; + + auto process_path = [&](const PathString& p_str) -> Status { + if (p_str.empty()) return Status::OK(); + std::filesystem::path path(p_str); + std::error_code ec; + // Validate: absolute, exists, not a symlink, and is a directory + if (!path.is_absolute()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Whitelisted data path is not absolute: ", path.string()); + } + if (!std::filesystem::exists(path, ec) || ec) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Whitelisted data path does not exist: ", path.string()); + } + if (std::filesystem::is_symlink(path, ec) || ec) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Whitelisted data path is a symlink: ", path.string()); + } + if (!std::filesystem::is_directory(path, ec) || ec) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Whitelisted data path is not a directory: ", path.string()); + } + result.push_back(path); + return Status::OK(); + }; + + constexpr PathChar kSemiColonSep = ORT_TSTR(';'); + + size_t start = 0; + size_t end = paths_str.find(kSemiColonSep); + + while (end != PathString::npos) { + ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start, end - start))); + start = end + 1; + end = paths_str.find(kSemiColonSep, start); + } + ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start))); + + paths = std::move(result); + + return Status::OK(); +} + Status ValidateExternalDataPath(const std::filesystem::path& base_dir, const std::filesystem::path& location) { // Reject absolute paths diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h index 685fa65a73720..2762098de118d 100644 --- a/onnxruntime/core/framework/tensorprotoutils.h +++ b/onnxruntime/core/framework/tensorprotoutils.h @@ -525,6 +525,17 @@ Status TensorProtoWithExternalDataToTensorProto( const std::filesystem::path& model_path, ONNX_NAMESPACE::TensorProto& new_tensor_proto); +/// +/// This function parses the input string which is expected to be a list of paths separated by ';' +/// and returns a vector of std::filesystem::paths. The function also validates that each path is an absolute path of a +/// folder, it is not a symlink and actually exists on the file system. +/// +/// +/// +/// Status +Status ParseWhiteListedPaths(const PathString& paths_str, + /*out*/ InlinedVector& paths); + /// /// The functions will make sure the 'location' specified in the external data is under the 'base_dir'. /// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path. diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc index 0d7b583faf27b..ba11f42c3f7b1 100644 --- a/onnxruntime/test/framework/tensorutils_test.cc +++ b/onnxruntime/test/framework/tensorutils_test.cc @@ -586,5 +586,169 @@ TEST_F(PathValidationTest, ValidateExternalDataPathWithSymlinkOutside) { ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "outside_link.bin").IsOK()); } +// Test fixture for ParseWhiteListedPaths tests. +class ParseWhiteListedPathsTest : public ::testing::Test { + protected: + void SetUp() override { + test_dir_ = std::filesystem::temp_directory_path() / "ParseWhiteListedPathsTest"; + sub_dir_a_ = test_dir_ / "dir_a"; + sub_dir_b_ = test_dir_ / "dir_b"; + std::filesystem::create_directories(sub_dir_a_); + std::filesystem::create_directories(sub_dir_b_); + + // Create a regular file (not a directory) + regular_file_ = test_dir_ / "file.txt"; + std::ofstream{regular_file_}; + } + + void TearDown() override { + std::filesystem::remove_all(test_dir_); + } + + PathString ToOrtPath(const std::filesystem::path& p) { +#ifdef _WIN32 + return p.wstring(); +#else + return p.string(); +#endif + } + + std::filesystem::path test_dir_; + std::filesystem::path sub_dir_a_; + std::filesystem::path sub_dir_b_; + std::filesystem::path regular_file_; +}; + +TEST_F(ParseWhiteListedPathsTest, EmptyStringReturnsOkAndEmptyVector) { + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(PathString(), paths)); + EXPECT_TRUE(paths.empty()); +} + +TEST_F(ParseWhiteListedPathsTest, SingleValidAbsoluteDirectory) { + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(ToOrtPath(sub_dir_a_), paths)); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], sub_dir_a_); +} + +TEST_F(ParseWhiteListedPathsTest, MultipleValidAbsoluteDirectories) { + PathString combined = ToOrtPath(sub_dir_a_) + ORT_TSTR(';') + ToOrtPath(sub_dir_b_); + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(combined, paths)); + ASSERT_EQ(paths.size(), 2u); + EXPECT_EQ(paths[0], sub_dir_a_); + EXPECT_EQ(paths[1], sub_dir_b_); +} + +TEST_F(ParseWhiteListedPathsTest, RelativePathReturnsError) { + InlinedVector paths; + PathString relative_path = ORT_TSTR("relative_dir"); + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(relative_path, paths), + "not absolute"); +} + +TEST_F(ParseWhiteListedPathsTest, NonExistentPathReturnsError) { + auto non_existent = test_dir_ / "does_not_exist"; + InlinedVector paths; + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(ToOrtPath(non_existent), paths), + "does not exist"); +} + +TEST_F(ParseWhiteListedPathsTest, FileNotDirectoryReturnsError) { + InlinedVector paths; + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(ToOrtPath(regular_file_), paths), + "not a directory"); +} + +TEST_F(ParseWhiteListedPathsTest, EmptySegmentBetweenSeparatorsIsSkipped) { + // "dir_a;;dir_b" has an empty segment between the two semicolons + PathString combined = ToOrtPath(sub_dir_a_) + ORT_TSTR(';') + ORT_TSTR(';') + ToOrtPath(sub_dir_b_); + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(combined, paths)); + ASSERT_EQ(paths.size(), 2u); + EXPECT_EQ(paths[0], sub_dir_a_); + EXPECT_EQ(paths[1], sub_dir_b_); +} + +TEST_F(ParseWhiteListedPathsTest, TrailingSeparatorProducesEmptySegment) { + PathString with_trailing = ToOrtPath(sub_dir_a_) + ORT_TSTR(';'); + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(with_trailing, paths)); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], sub_dir_a_); +} + +TEST_F(ParseWhiteListedPathsTest, LeadingSeparatorProducesEmptySegment) { + PathString with_leading = ORT_TSTR(';') + ToOrtPath(sub_dir_a_); + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(with_leading, paths)); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], sub_dir_a_); +} + +TEST_F(ParseWhiteListedPathsTest, OutParamIsClearedOnEachCall) { + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(ToOrtPath(sub_dir_a_), paths)); + ASSERT_EQ(paths.size(), 1u); + + // Call again with empty string; paths should be cleared + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(PathString(), paths)); + EXPECT_TRUE(paths.empty()); +} + +TEST_F(ParseWhiteListedPathsTest, SymlinkDirectoryReturnsError) { + auto link_path = test_dir_ / "link_to_dir"; + try { + std::filesystem::create_directory_symlink(sub_dir_a_, link_path); + } catch (const std::exception& e) { + GTEST_SKIP() << "Skipping symlink test: symlink creation not supported. " << e.what(); + } + InlinedVector paths; + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(ToOrtPath(link_path), paths), + "symlink"); +} + +TEST_F(ParseWhiteListedPathsTest, OnlySeparatorsReturnsEmptyVector) { + PathString only_seps = ORT_TSTR(";;;"); + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(only_seps, paths)); + EXPECT_TRUE(paths.empty()); +} + +TEST_F(ParseWhiteListedPathsTest, ErrorOnSecondPathDoesNotModifyOutput) { + // Pre-populate paths to verify it is not modified on error + InlinedVector paths; + paths.push_back(std::filesystem::path(ORT_TSTR("/dummy/sentinel"))); + + auto non_existent = test_dir_ / "no_such_dir"; + PathString combined = ToOrtPath(sub_dir_a_) + ORT_TSTR(';') + ToOrtPath(non_existent); + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(combined, paths), + "does not exist"); + // Output container must be unchanged on error + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], std::filesystem::path(ORT_TSTR("/dummy/sentinel"))); +} + +TEST_F(ParseWhiteListedPathsTest, OutParamUnchangedOnError) { + // First call succeeds + InlinedVector paths; + ASSERT_STATUS_OK(utils::ParseWhiteListedPaths(ToOrtPath(sub_dir_a_), paths)); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], sub_dir_a_); + + // Second call fails - paths should retain the previous successful result + PathString relative_path = ORT_TSTR("relative_dir"); + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(relative_path, paths), + "not absolute"); + ASSERT_EQ(paths.size(), 1u); + EXPECT_EQ(paths[0], sub_dir_a_); +} } // namespace test } // namespace onnxruntime From 0853aa89195347b197817b9d0e25f9befa52a8ad Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Fri, 13 Feb 2026 19:06:39 -0800 Subject: [PATCH 2/5] Add SessionOptionsSetWhiteListedDataFolders public API --- .../core/session/onnxruntime_c_api.h | 17 +++++++++++++++++ .../core/session/onnxruntime_cxx_api.h | 2 ++ .../core/session/onnxruntime_cxx_inline.h | 6 ++++++ onnxruntime/core/framework/session_options.h | 4 ++++ onnxruntime/core/session/abi_session_options.cc | 11 +++++++++++ onnxruntime/core/session/onnxruntime_c_api.cc | 3 ++- onnxruntime/core/session/ort_apis.h | 3 ++- 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 77c2ff795e800..62ab37c1bfb40 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -7221,6 +7221,23 @@ struct OrtApi { _Outptr_result_maybenull_ const int64_t** shape_data, _Out_ size_t* shape_data_count); + /** \brief Set whitelisted data folders for external data loading. + * + * Sets a semicolon-separated list of absolute directory paths that are allowed as sources + * for external data. Each path must be an absolute path to an existing directory and must not + * be a symbolic link. + * + * \param[in] options Session options instance. + * \param[in] whitelisted_data_folders Semicolon-separated list of absolute directory paths, or + * nullptr/empty string to clear the whitelist. + * + * \return nullptr on success, or an OrtStatus on failure. + * + * \since Version 1.24. + */ + ORT_API2_STATUS(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options, + _In_ const ORTCHAR_T* whitelisted_data_folders); + /** \brief Enable profiling for this run * * \param[in] options diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 2c1d52894e7f3..8eb6b6ff8326f 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -1558,6 +1558,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl { ///< Wraps OrtApi::AddFreeDimensionOverrideByName SessionOptionsImpl& AddFreeDimensionOverrideByName(const char* dim_name, int64_t dim_value); + ///< Wraps OrtApi::SessionOptionsSetWhiteListedDataFolders + SessionOptionsImpl& SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders); }; } // namespace detail diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 745128fe6c7b4..d0bfb33d78dd7 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -1304,6 +1304,12 @@ inline SessionOptionsImpl& SessionOptionsImpl::SetLoadCancellationFlag(boo return *this; } +template +inline SessionOptionsImpl& SessionOptionsImpl::SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders) { + ThrowOnError(GetApi().SessionOptionsSetWhiteListedDataFolders(this->p_, whitelisted_data_folders)); + return *this; +} + template inline SessionOptionsImpl& SessionOptionsImpl::SetLogId(const char* logid) { ThrowOnError(GetApi().SetSessionLogId(this->p_, logid)); diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index b328fc916f885..cd5450c6fe862 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -226,6 +226,10 @@ struct SessionOptions { bool has_explicit_ep_context_gen_options = false; epctx::ModelGenOptions ep_context_gen_options = {}; epctx::ModelGenOptions GetEpContextGenerationOptions() const; + + // Semicolon-separated list of whitelisted data folder paths. + // Used to restrict where external data can be loaded from. + PathString whitelisted_data_folders; }; inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_options) { diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc index 3df6d37d63794..7c1d4b558bf9c 100644 --- a/onnxruntime/core/session/abi_session_options.cc +++ b/onnxruntime/core/session/abi_session_options.cc @@ -413,3 +413,14 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetLoadCancellationFlag, _Inout_ OrtS return nullptr; API_IMPL_END } + +ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options, + _In_ const ORTCHAR_T* whitelisted_data_folders) { + API_IMPL_BEGIN + if (whitelisted_data_folders == nullptr) { + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Input whitelisted_data_folders is nullptr"); + } + options->value.whitelisted_data_folders = whitelisted_data_folders; + return nullptr; + API_IMPL_END +} diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 7a027c8eafb81..014039d314fdd 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -4803,6 +4803,7 @@ static constexpr OrtApi ort_api_1_to_25 = { &OrtApis::EpAssignedNode_GetOperatorType, &OrtApis::RunOptionsSetSyncStream, &OrtApis::GetTensorElementTypeAndShapeDataReference, + &OrtApis::SessionOptionsSetWhiteListedDataFolders, // End of Version 24 - DO NOT MODIFY ABOVE (see above text for more information) &OrtApis::RunOptionsEnableProfiling, @@ -4843,7 +4844,7 @@ static_assert(offsetof(OrtApi, SetEpDynamicOptions) / sizeof(void*) == 284, "Siz static_assert(offsetof(OrtApi, GetEpApi) / sizeof(void*) == 317, "Size of version 22 API cannot change"); static_assert(offsetof(OrtApi, CreateExternalInitializerInfo) / sizeof(void*) == 389, "Size of version 23 API cannot change"); -static_assert(offsetof(OrtApi, GetTensorElementTypeAndShapeDataReference) / sizeof(void*) == 414, "Size of version 24 API cannot change"); +static_assert(offsetof(OrtApi, SessionOptionsSetWhiteListedDataFolders) / sizeof(void*) == 415, "Size of version 24 API cannot change"); // So that nobody forgets to finish an API version, this check will serve as a reminder: static_assert(std::string_view(ORT_VERSION) == "1.25.0", diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 3d990909cfb41..8edd396d8b7ca 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -78,7 +78,8 @@ ORT_API_STATUS_IMPL(CreateCustomOpDomain, _In_ const char* domain, _Outptr_ OrtC ORT_API_STATUS_IMPL(CustomOpDomain_Add, _Inout_ OrtCustomOpDomain* custom_op_domain, _In_ const OrtCustomOp* op); ORT_API_STATUS_IMPL(AddCustomOpDomain, _Inout_ OrtSessionOptions* options, _In_ OrtCustomOpDomain* custom_op_domain); ORT_API_STATUS_IMPL(RegisterCustomOpsLibrary, _Inout_ OrtSessionOptions* options, _In_ const char* library_path, _Outptr_ void** library_handle); - +ORT_API_STATUS_IMPL(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options, + _In_ const ORTCHAR_T* whitelisted_data_folders); ORT_API_STATUS_IMPL(SessionGetInputCount, _In_ const OrtSession* sess, _Out_ size_t* out); ORT_API_STATUS_IMPL(SessionGetOutputCount, _In_ const OrtSession* sess, _Out_ size_t* out); ORT_API_STATUS_IMPL(SessionGetOverridableInitializerCount, _In_ const OrtSession* sess, _Out_ size_t* out); From 4f70b56f8824ca2628effc17c174afd0ca8864a7 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 14 Feb 2026 15:32:05 -0800 Subject: [PATCH 3/5] Properly detect symlinks --- .../core/framework/tensorprotoutils.cc | 41 ++++++++++++++----- .../test/framework/tensorutils_test.cc | 25 ++++++++++- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index 7cb927fb9a23c..5125aaf4defdd 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -341,24 +341,46 @@ Status ParseWhiteListedPaths(const PathString& paths_str, if (p_str.empty()) return Status::OK(); std::filesystem::path path(p_str); std::error_code ec; - // Validate: absolute, exists, not a symlink, and is a directory if (!path.is_absolute()) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Whitelisted data path is not absolute: ", path.string()); } - if (!std::filesystem::exists(path, ec) || ec) { + // canonical() resolves all symlinks and requires the path to exist. + // If it fails, the path either doesn't exist or can't be resolved. + auto canonical_path = std::filesystem::canonical(path, ec); + if (ec) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Whitelisted data path does not exist: ", path.string()); + "Whitelisted data path does not exist or cannot be resolved: ", path.string()); } - if (std::filesystem::is_symlink(path, ec) || ec) { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, - "Whitelisted data path is a symlink: ", path.string()); + // Walk each component of the canonical path and check for symlinks. + // We choose with approach because both canonical() and weakly_canonical() on Windows + // (MSVC's implementation) resolve symlinks for existing path components + // using the same underlying Win32 API (GetFinalPathNameByHandle). + // So comparing them always produces an equal result, making symlink detection impossible via comparison. + // We check the canonical path (not the original) so that normalization differences + // (trailing slashes, "..", ".") don't interfere, while still detecting symlinks + // that may exist along the resolved path. + { + auto normalized = path.lexically_normal(); + std::filesystem::path accumulated; + for (const auto& component : normalized) { + accumulated /= component; + // Skip checking the root (e.g. "C:\" or "/") since is_symlink would fail or be meaningless. + if (accumulated == normalized.root_path()) { + continue; + } + if (std::filesystem::is_symlink(accumulated, ec)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Whitelisted data path contains a symlink: ", path.string()); + } + } } - if (!std::filesystem::is_directory(path, ec) || ec) { + + if (!std::filesystem::is_directory(canonical_path, ec) || ec) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Whitelisted data path is not a directory: ", path.string()); } - result.push_back(path); + result.push_back(canonical_path); return Status::OK(); }; @@ -375,7 +397,6 @@ Status ParseWhiteListedPaths(const PathString& paths_str, ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start))); paths = std::move(result); - return Status::OK(); } @@ -388,7 +409,7 @@ Status ValidateExternalDataPath(const std::filesystem::path& base_dir, // Resolve and verify the path stays within model directory auto base_canonical = std::filesystem::weakly_canonical(base_dir); // If the symlink exists, it resolves to the target path; - // so if the symllink is outside the directory it would be caught here. + // so if the symlink is outside the directory it would be caught here. auto resolved = std::filesystem::weakly_canonical(base_dir / location); // Check that resolved path starts with base directory auto [base_end, resolved_it] = std::mismatch( diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc index ba11f42c3f7b1..cd77e7331b190 100644 --- a/onnxruntime/test/framework/tensorutils_test.cc +++ b/onnxruntime/test/framework/tensorutils_test.cc @@ -596,6 +596,11 @@ class ParseWhiteListedPathsTest : public ::testing::Test { std::filesystem::create_directories(sub_dir_a_); std::filesystem::create_directories(sub_dir_b_); + // Canonicalize the paths so that tests can compare against what ParseWhiteListedPaths stores. + test_dir_ = std::filesystem::canonical(test_dir_); + sub_dir_a_ = std::filesystem::canonical(sub_dir_a_); + sub_dir_b_ = std::filesystem::canonical(sub_dir_b_); + // Create a regular file (not a directory) regular_file_ = test_dir_ / "file.txt"; std::ofstream{regular_file_}; @@ -710,7 +715,25 @@ TEST_F(ParseWhiteListedPathsTest, SymlinkDirectoryReturnsError) { InlinedVector paths; ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( utils::ParseWhiteListedPaths(ToOrtPath(link_path), paths), - "symlink"); + "contains a symlink"); +} + +TEST_F(ParseWhiteListedPathsTest, SymlinkInIntermediateComponentReturnsError) { + // Create: test_dir_/link_to_dir_a -> sub_dir_a_, then use test_dir_/link_to_dir_a/nested as the path. + // Even though the final target is a real directory, the path has a symlink component. + auto nested_dir = sub_dir_a_ / "nested"; + std::filesystem::create_directories(nested_dir); + auto link_in_path = test_dir_ / "link_to_dir_a"; + try { + std::filesystem::create_directory_symlink(sub_dir_a_, link_in_path); + } catch (const std::exception& e) { + GTEST_SKIP() << "Skipping symlink test: symlink creation not supported. " << e.what(); + } + auto path_through_link = link_in_path / "nested"; + InlinedVector paths; + ASSERT_STATUS_NOT_OK_AND_HAS_SUBSTR( + utils::ParseWhiteListedPaths(ToOrtPath(path_through_link), paths), + "contains a symlink"); } TEST_F(ParseWhiteListedPathsTest, OnlySeparatorsReturnsEmptyVector) { From 0f9147ca07bd0c510ba91813db6349568b1c55a4 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Sat, 14 Feb 2026 16:21:03 -0800 Subject: [PATCH 4/5] Test validate path --- include/onnxruntime/core/graph/graph.h | 5 +- .../core/framework/tensorprotoutils.cc | 46 +++++++--- onnxruntime/core/framework/tensorprotoutils.h | 7 +- onnxruntime/core/graph/graph.cc | 4 +- .../providers/shared_library/provider_api.h | 5 -- .../shared_library/provider_interfaces.h | 3 - onnxruntime/core/session/inference_session.cc | 5 +- .../core/session/provider_bridge_ort.cc | 5 -- .../test/framework/tensorutils_test.cc | 84 +++++++++++++++++++ onnxruntime/test/ir/graph_test.cc | 2 +- 10 files changed, 135 insertions(+), 31 deletions(-) diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index 58473a79ddaa6..6333be312935c 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -1463,10 +1463,11 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi /// /// This function converts all the graph TensorProto initializers into OrtValues - /// and creates a in-memory external data reference for each OrtValue. + /// and creates a in-memory external data reference for each OrtValue. It validates external paths data references. /// + /// /// - Status ConvertInitializersIntoOrtValues(); + Status ConvertInitializersIntoOrtValues(gsl::span whitelisted_external_paths); /** * @brief This function examines the specified initializers in the graph and converts them inline diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index 5125aaf4defdd..08a27a1541db7 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -401,23 +401,49 @@ Status ParseWhiteListedPaths(const PathString& paths_str, } Status ValidateExternalDataPath(const std::filesystem::path& base_dir, - const std::filesystem::path& location) { + const std::filesystem::path& location, + gsl::span whitelisted_external_folders) { // Reject absolute paths ORT_RETURN_IF(location.is_absolute(), "Absolute paths not allowed for external data location"); - if (!base_dir.empty()) { - // Resolve and verify the path stays within model directory - auto base_canonical = std::filesystem::weakly_canonical(base_dir); - // If the symlink exists, it resolves to the target path; - // so if the symlink is outside the directory it would be caught here. - auto resolved = std::filesystem::weakly_canonical(base_dir / location); - // Check that resolved path starts with base directory + + auto validate_location_under_dir = [&location](const std::filesystem::path& dir) -> bool { + if (dir.empty()) { + return false; + } + auto base_canonical = std::filesystem::weakly_canonical(dir); + auto resolved = std::filesystem::weakly_canonical(dir / location); auto [base_end, resolved_it] = std::mismatch( base_canonical.begin(), base_canonical.end(), resolved.begin(), resolved.end()); - ORT_RETURN_IF(base_end != base_canonical.end(), - "External data path: ", location, " escapes model directory: ", base_dir); + return base_end == base_canonical.end(); + }; + + if (!base_dir.empty()) { + if (validate_location_under_dir(base_dir)) { + return Status::OK(); + } } + + // base_dir validation failed or base_dir is empty, try whitelisted folders + if (!whitelisted_external_folders.empty()) { + for (const auto& folder : whitelisted_external_folders) { + if (validate_location_under_dir(folder)) { + return Status::OK(); + } + } + + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "External data path: ", location, + " is not under any allowed directory"); + } + + // No whitelisted folders supplied + if (!base_dir.empty()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "External data path: ", location, " escapes model directory: ", base_dir); + } + return Status::OK(); } diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h index 2762098de118d..7908b08929a5a 100644 --- a/onnxruntime/core/framework/tensorprotoutils.h +++ b/onnxruntime/core/framework/tensorprotoutils.h @@ -539,14 +539,17 @@ Status ParseWhiteListedPaths(const PathString& paths_str, /// /// The functions will make sure the 'location' specified in the external data is under the 'base_dir'. /// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path. +/// If validation fails for base_dir, the function will check against whitelisted_external_folders. /// /// model location directory /// location is a string retrieved from TensorProto external data that is not /// an in-memory tag +/// additional folders where external data is allowed /// The function will fail if the resolved full path is not under the model directory -/// or one of the subdirectories +/// or one of the whitelisted folders Status ValidateExternalDataPath(const std::filesystem::path& base_dir, - const std::filesystem::path& location); + const std::filesystem::path& location, + gsl::span whitelisted_external_folders = {}); #endif // !defined(SHARED_PROVIDER) diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 779ca5d180518..47b37ac80f47c 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -3737,7 +3737,7 @@ Status Graph::Resolve(const ResolveOptions& options) { return ForThisAndAllSubgraphs(all_subgraphs, finalize_func); } -Status Graph::ConvertInitializersIntoOrtValues() { +Status Graph::ConvertInitializersIntoOrtValues(gsl::span whitelisted_external_paths) { std::vector all_subgraphs; FindAllSubgraphs(all_subgraphs); @@ -3771,7 +3771,7 @@ Status Graph::ConvertInitializersIntoOrtValues() { std::unique_ptr external_data_info; ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info)); const auto& location = external_data_info->GetRelPath(); - auto st = utils::ValidateExternalDataPath(model_dir, location); + auto st = utils::ValidateExternalDataPath(model_dir, location, whitelisted_external_paths); if (!st.IsOK()) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "External data path validation failed for initializer: ", tensor_proto.name(), diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index f5421d8540db8..1ed78c89e722d 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -453,11 +453,6 @@ inline bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto return g_host->Utils__HasExternalDataInMemory(ten_proto); } -inline Status ValidateExternalDataPath(const std::filesystem::path& base_dir, - const std::filesystem::path& location) { - return g_host->Utils__ValidateExternalDataPath(base_dir, location); -} - } // namespace utils namespace graph_utils { diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index aeaf05cf14591..9cbbc6234a99b 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -1004,9 +1004,6 @@ struct ProviderHost { virtual bool Utils__HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto) = 0; - virtual Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path, - const std::filesystem::path& location) = 0; - // Model virtual std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index c00d63d0be8a2..e14942b8f5b39 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -1390,7 +1390,10 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool // auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get(), tensor_proto.name(), // use_tensor_buffer_true); // ORT_RETURN_IF_ERROR(graph.ReplaceInitializedTensor(tensor_proto_to_add, ort_value)); - ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues()); + InlinedVector whitelisted_external_data_folders; + ORT_RETURN_IF_ERROR_SESSIONID_(utils::ParseWhiteListedPaths(session_options_.whitelisted_data_folders, + whitelisted_external_data_folders)); + ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues(whitelisted_external_data_folders)); auto apply_transformer_once = [](const GraphTransformer& transformer, const logging::Logger& logger, Graph& graph, bool* is_graph_modified = nullptr) -> onnxruntime::common::Status { diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 6949ed0059add..e5bbd656bc325 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1295,11 +1295,6 @@ struct ProviderHostImpl : ProviderHost { return onnxruntime::utils::HasExternalDataInMemory(ten_proto); } - Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path, - const std::filesystem::path& location) override { - return onnxruntime::utils::ValidateExternalDataPath(base_path, location); - } - // Model (wrapped) std::unique_ptr Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path, const IOnnxRuntimeOpSchemaRegistryList* local_registries, diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc index cd77e7331b190..4191d7a9a219b 100644 --- a/onnxruntime/test/framework/tensorutils_test.cc +++ b/onnxruntime/test/framework/tensorutils_test.cc @@ -511,18 +511,22 @@ class PathValidationTest : public ::testing::Test { // Create a temporary directory for the tests. base_dir_ = std::filesystem::temp_directory_path() / "PathValidationTest"; outside_dir_ = std::filesystem::temp_directory_path() / "outside"; + whitelisted_dir_ = std::filesystem::temp_directory_path() / "whitelisted"; std::filesystem::create_directories(base_dir_); std::filesystem::create_directories(outside_dir_); + std::filesystem::create_directories(whitelisted_dir_); } void TearDown() override { // Clean up the temporary directory. std::filesystem::remove_all(base_dir_); std::filesystem::remove_all(outside_dir_); + std::filesystem::remove_all(whitelisted_dir_); } std::filesystem::path base_dir_; std::filesystem::path outside_dir_; + std::filesystem::path whitelisted_dir_; }; // Test cases for ValidateExternalDataPath. @@ -586,6 +590,85 @@ TEST_F(PathValidationTest, ValidateExternalDataPathWithSymlinkOutside) { ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "outside_link.bin").IsOK()); } +TEST_F(PathValidationTest, ValidateExternalDataPathWithWhitelistedFolder) { + // Path is valid under the whitelisted folder directly. + std::vector whitelist = {outside_dir_}; + ASSERT_STATUS_OK(utils::ValidateExternalDataPath(outside_dir_, "data.bin", whitelist)); +} + +TEST_F(PathValidationTest, ValidateExternalDataPathEscapesBaseButMatchesWhitelist) { + std::vector whitelist = {whitelisted_dir_}; + // "data.bin" is valid under base_dir_, no need for whitelist + ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, "data.bin", whitelist)); + + // Create a subdirectory of whitelisted_dir_ and use that as the whitelisted folder. + auto whitelisted_sub = whitelisted_dir_ / "sub"; + std::filesystem::create_directories(whitelisted_sub); + std::vector whitelist2 = {whitelisted_sub}; + + // "../data.bin" escapes base_dir_ and also escapes whitelisted_sub + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin").IsOK()); + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin", whitelist2).IsOK()); +} + +TEST_F(PathValidationTest, ValidateExternalDataPathWhitelistSavesEscapingPath) { + // Location "../outside/data.bin" escapes base_dir_ but resolves under outside_dir_. + auto relative_to_outside = std::filesystem::path("..") / "outside" / "data.bin"; + std::vector whitelist = {outside_dir_}; + + // Without whitelist, it should fail. + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, relative_to_outside).IsOK()); + + // With whitelist containing outside_dir_, it should succeed because + // outside_dir_ / "../outside/data.bin" resolves under outside_dir_. + ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, relative_to_outside, whitelist)); +} + +TEST_F(PathValidationTest, ValidateExternalDataPathWhitelistDoesNotMatchEither) { + // Location escapes both base_dir and all whitelisted folders. + auto unrelated_dir = std::filesystem::temp_directory_path() / "unrelated_PathValidationTest"; + std::filesystem::create_directories(unrelated_dir); + auto cleanup = [&]() { std::filesystem::remove_all(unrelated_dir); }; + + std::vector whitelist = {whitelisted_dir_}; + auto escaping_location = std::filesystem::path("..") / "unrelated_PathValidationTest" / "data.bin"; + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, escaping_location, whitelist).IsOK()); + + cleanup(); +} + +TEST_F(PathValidationTest, ValidateExternalDataPathEmptyWhitelist) { + // Empty whitelist should behave the same as no whitelist. + std::vector empty_whitelist; + ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, "data.bin", empty_whitelist)); + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin", empty_whitelist).IsOK()); +} + +TEST_F(PathValidationTest, ValidateExternalDataPathMultipleWhitelistedFolders) { + // First whitelisted folder doesn't match, second one does. + auto another_dir = std::filesystem::temp_directory_path() / "another_PathValidationTest"; + std::filesystem::create_directories(another_dir); + auto cleanup = [&]() { std::filesystem::remove_all(another_dir); }; + + auto relative_to_outside = std::filesystem::path("..") / "outside" / "data.bin"; + std::vector whitelist = {another_dir, outside_dir_}; + + // Escapes base_dir_ but outside_dir_ (second whitelist entry) should match. + ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, relative_to_outside, whitelist)); + + cleanup(); +} + +TEST_F(PathValidationTest, ValidateExternalDataPathAbsoluteLocationRejectsEvenWithWhitelist) { + // Absolute paths are always rejected, regardless of whitelist. + std::vector whitelist = {outside_dir_}; +#ifdef _WIN32 + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "C:\\data.bin", whitelist).IsOK()); +#else + ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "/data.bin", whitelist).IsOK()); +#endif +} + // Test fixture for ParseWhiteListedPaths tests. class ParseWhiteListedPathsTest : public ::testing::Test { protected: @@ -773,5 +856,6 @@ TEST_F(ParseWhiteListedPathsTest, OutParamUnchangedOnError) { ASSERT_EQ(paths.size(), 1u); EXPECT_EQ(paths[0], sub_dir_a_); } + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/ir/graph_test.cc b/onnxruntime/test/ir/graph_test.cc index 4d80cb704748c..5a5c3569ac7ac 100644 --- a/onnxruntime/test/ir/graph_test.cc +++ b/onnxruntime/test/ir/graph_test.cc @@ -2817,7 +2817,7 @@ TEST_F(GraphTest, ShapeInferenceAfterInitializerExternalization) { << "We no longer externalize data in the Graph constructor."; // Now externalize explicitly to trigger the bug scenario - ASSERT_STATUS_OK(graph.ConvertInitializersIntoOrtValues()); + ASSERT_STATUS_OK(graph.ConvertInitializersIntoOrtValues({})); ASSERT_TRUE(graph.GetInitializedTensor("split_sizes", initializer_after)); ASSERT_NE(initializer_after, nullptr); ASSERT_TRUE(utils::HasExternalDataInMemory(*initializer_after)) From 04c808725f8d0f8d06c78491409cba41ac592217 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 16 Feb 2026 18:15:22 +0000 Subject: [PATCH 5/5] address copilot feedback --- include/onnxruntime/core/graph/graph.h | 10 ++++++++++ include/onnxruntime/core/session/onnxruntime_c_api.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index 6333be312935c..9d006d258c36c 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -1469,6 +1469,16 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi /// Status ConvertInitializersIntoOrtValues(gsl::span whitelisted_external_paths); + /// + /// This function converts all the graph TensorProto initializers into OrtValues + /// and creates a in-memory external data reference for each OrtValue. + /// External data paths are restricted to the model directory. + /// + /// + Status ConvertInitializersIntoOrtValues() { + return ConvertInitializersIntoOrtValues(gsl::span()); + } + /** * @brief This function examines the specified initializers in the graph and converts them inline * if any has external data in memory. diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 62ab37c1bfb40..7a19a06370f27 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -7229,7 +7229,7 @@ struct OrtApi { * * \param[in] options Session options instance. * \param[in] whitelisted_data_folders Semicolon-separated list of absolute directory paths, or - * nullptr/empty string to clear the whitelist. + * an empty string to clear the whitelist. This pointer must not be NULL. * * \return nullptr on success, or an OrtStatus on failure. *