Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1463,10 +1463,11 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi

/// <summary>
/// This function converts all the graph TensorProto initializers into OrtValues
/// and creates a in-memory external data reference for each OrtValue.
/// and creates a in-memory external data reference for each OrtValue. It validates external paths data references.
/// </summary>
/// <param name="whitelisted_external_paths"></param>
/// <returns></returns>
Status ConvertInitializersIntoOrtValues();
Status ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path> whitelisted_external_paths);

/**
* @brief This function examines the specified initializers in the graph and converts them inline
Expand Down
17 changes: 17 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -7221,6 +7221,23 @@ struct OrtApi {
_Outptr_result_maybenull_ const int64_t** shape_data,
_Out_ size_t* shape_data_count);

/** \brief Set whitelisted data folders for external data loading.
*
* Sets a semicolon-separated list of absolute directory paths that are allowed as sources
* for external data. Each path must be an absolute path to an existing directory and must not
* be a symbolic link.
*
* \param[in] options Session options instance.
* \param[in] whitelisted_data_folders Semicolon-separated list of absolute directory paths, or
* nullptr/empty string to clear the whitelist.
*
* \return nullptr on success, or an OrtStatus on failure.
*
* \since Version 1.24.
*/
ORT_API2_STATUS(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* whitelisted_data_folders);

/** \brief Enable profiling for this run
*
* \param[in] options
Expand Down
2 changes: 2 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1558,6 +1558,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {

///< Wraps OrtApi::AddFreeDimensionOverrideByName
SessionOptionsImpl& AddFreeDimensionOverrideByName(const char* dim_name, int64_t dim_value);
///< Wraps OrtApi::SessionOptionsSetWhiteListedDataFolders
SessionOptionsImpl& SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders);
};
} // namespace detail

Expand Down
6 changes: 6 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,12 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLoadCancellationFlag(boo
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders) {
ThrowOnError(GetApi().SessionOptionsSetWhiteListedDataFolders(this->p_, whitelisted_data_folders));
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLogId(const char* logid) {
ThrowOnError(GetApi().SetSessionLogId(this->p_, logid));
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/framework/session_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ struct SessionOptions {
bool has_explicit_ep_context_gen_options = false;
epctx::ModelGenOptions ep_context_gen_options = {};
epctx::ModelGenOptions GetEpContextGenerationOptions() const;

// Semicolon-separated list of whitelisted data folder paths.
// Used to restrict where external data can be loaded from.
PathString whitelisted_data_folders;
};

inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_options) {
Expand Down
118 changes: 108 additions & 10 deletions onnxruntime/core/framework/tensorprotoutils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,24 +328,122 @@ Status TensorProtoWithExternalDataToTensorProto(
return Status::OK();
}

Status ParseWhiteListedPaths(const PathString& paths_str,
/*out*/ InlinedVector<std::filesystem::path>& paths) {
if (paths_str.empty()) {
paths.clear();
return Status::OK();
}

InlinedVector<std::filesystem::path> result;

auto process_path = [&](const PathString& p_str) -> Status {
if (p_str.empty()) return Status::OK();
std::filesystem::path path(p_str);
std::error_code ec;
if (!path.is_absolute()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path is not absolute: ", path.string());
}
// canonical() resolves all symlinks and requires the path to exist.
// If it fails, the path either doesn't exist or can't be resolved.
auto canonical_path = std::filesystem::canonical(path, ec);
if (ec) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path does not exist or cannot be resolved: ", path.string());
}
// Walk each component of the canonical path and check for symlinks.
// We choose with approach because both canonical() and weakly_canonical() on Windows
// (MSVC's <filesystem> implementation) resolve symlinks for existing path components
// using the same underlying Win32 API (GetFinalPathNameByHandle).
// So comparing them always produces an equal result, making symlink detection impossible via comparison.
// We check the canonical path (not the original) so that normalization differences
// (trailing slashes, "..", ".") don't interfere, while still detecting symlinks
// that may exist along the resolved path.
{
auto normalized = path.lexically_normal();
std::filesystem::path accumulated;
for (const auto& component : normalized) {
accumulated /= component;
// Skip checking the root (e.g. "C:\" or "/") since is_symlink would fail or be meaningless.
if (accumulated == normalized.root_path()) {
continue;
}
if (std::filesystem::is_symlink(accumulated, ec)) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path contains a symlink: ", path.string());
}
}
}

if (!std::filesystem::is_directory(canonical_path, ec) || ec) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path is not a directory: ", path.string());
}
result.push_back(canonical_path);
return Status::OK();
};

constexpr PathChar kSemiColonSep = ORT_TSTR(';');

size_t start = 0;
size_t end = paths_str.find(kSemiColonSep);

while (end != PathString::npos) {
ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start, end - start)));
start = end + 1;
end = paths_str.find(kSemiColonSep, start);
}
ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start)));

paths = std::move(result);
return Status::OK();
}

Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
const std::filesystem::path& location) {
const std::filesystem::path& location,
gsl::span<const std::filesystem::path> whitelisted_external_folders) {
// Reject absolute paths
ORT_RETURN_IF(location.is_absolute(),
"Absolute paths not allowed for external data location");
if (!base_dir.empty()) {
// Resolve and verify the path stays within model directory
auto base_canonical = std::filesystem::weakly_canonical(base_dir);
// If the symlink exists, it resolves to the target path;
// so if the symllink is outside the directory it would be caught here.
auto resolved = std::filesystem::weakly_canonical(base_dir / location);
// Check that resolved path starts with base directory

auto validate_location_under_dir = [&location](const std::filesystem::path& dir) -> bool {
if (dir.empty()) {
return false;
}
auto base_canonical = std::filesystem::weakly_canonical(dir);
auto resolved = std::filesystem::weakly_canonical(dir / location);
auto [base_end, resolved_it] = std::mismatch(
base_canonical.begin(), base_canonical.end(),
resolved.begin(), resolved.end());
ORT_RETURN_IF(base_end != base_canonical.end(),
"External data path: ", location, " escapes model directory: ", base_dir);
return base_end == base_canonical.end();
};

if (!base_dir.empty()) {
if (validate_location_under_dir(base_dir)) {
return Status::OK();
}
}

// base_dir validation failed or base_dir is empty, try whitelisted folders
if (!whitelisted_external_folders.empty()) {
for (const auto& folder : whitelisted_external_folders) {
if (validate_location_under_dir(folder)) {
return Status::OK();
}
}

return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"External data path: ", location,
" is not under any allowed directory");
}

// No whitelisted folders supplied
if (!base_dir.empty()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"External data path: ", location, " escapes model directory: ", base_dir);
}

return Status::OK();
}

Expand Down
18 changes: 16 additions & 2 deletions onnxruntime/core/framework/tensorprotoutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -525,17 +525,31 @@ Status TensorProtoWithExternalDataToTensorProto(
const std::filesystem::path& model_path,
ONNX_NAMESPACE::TensorProto& new_tensor_proto);

/// <summary>
/// This function parses the input string which is expected to be a list of paths separated by ';'
/// and returns a vector of std::filesystem::paths. The function also validates that each path is an absolute path of a
/// folder, it is not a symlink and actually exists on the file system.
/// </summary>
/// <param name="paths_str"></param>
/// <param name="paths"></param>
/// <returns>Status</returns>
Status ParseWhiteListedPaths(const PathString& paths_str,
/*out*/ InlinedVector<std::filesystem::path>& paths);

/// <summary>
/// The functions will make sure the 'location' specified in the external data is under the 'base_dir'.
/// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path.
/// If validation fails for base_dir, the function will check against whitelisted_external_folders.
/// </summary>
/// <param name="base_dir">model location directory</param>
/// <param name="location">location is a string retrieved from TensorProto external data that is not
/// an in-memory tag</param>
/// <param name="whitelisted_external_folders">additional folders where external data is allowed</param>
/// <returns>The function will fail if the resolved full path is not under the model directory
/// or one of the subdirectories</returns>
/// or one of the whitelisted folders</returns>
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
const std::filesystem::path& location);
const std::filesystem::path& location,
gsl::span<const std::filesystem::path> whitelisted_external_folders = {});

#endif // !defined(SHARED_PROVIDER)

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3737,7 +3737,7 @@ Status Graph::Resolve(const ResolveOptions& options) {
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
}

Status Graph::ConvertInitializersIntoOrtValues() {
Status Graph::ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path> whitelisted_external_paths) {
std::vector<Graph*> all_subgraphs;
FindAllSubgraphs(all_subgraphs);

Expand Down Expand Up @@ -3771,7 +3771,7 @@ Status Graph::ConvertInitializersIntoOrtValues() {
std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
const auto& location = external_data_info->GetRelPath();
auto st = utils::ValidateExternalDataPath(model_dir, location);
auto st = utils::ValidateExternalDataPath(model_dir, location, whitelisted_external_paths);
if (!st.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"External data path validation failed for initializer: ", tensor_proto.name(),
Expand Down
5 changes: 0 additions & 5 deletions onnxruntime/core/providers/shared_library/provider_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,11 +453,6 @@ inline bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto
return g_host->Utils__HasExternalDataInMemory(ten_proto);
}

Copy link

Copilot AI Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

provider_api.h no longer provides utils::ValidateExternalDataPath for shared providers. Given tensorprotoutils.h hides ValidateExternalDataPath behind !SHARED_PROVIDER, this removes the ability for shared providers to perform the validation. Consider reintroducing this wrapper with the new whitelist-aware signature if shared providers still need it.

Suggested change
inline Status ValidateExternalDataPath(const PathString& model_path,
const PathString& external_data_path,
const std::vector<PathString>* allowed_paths,
bool* is_whitelisted) {
return g_host->Utils__ValidateExternalDataPath(model_path, external_data_path, allowed_paths, is_whitelisted);
}

Copilot uses AI. Check for mistakes.
inline Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
const std::filesystem::path& location) {
return g_host->Utils__ValidateExternalDataPath(base_dir, location);
}

} // namespace utils

namespace graph_utils {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1004,9 +1004,6 @@ struct ProviderHost {

virtual bool Utils__HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto) = 0;

Copy link

Copilot AI Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing Utils__ValidateExternalDataPath from ProviderHost eliminates the only ValidateExternalDataPath entrypoint available to shared-provider code (tensorprotoutils.h excludes it under SHARED_PROVIDER). If bridge/shared EPs still need to validate external data locations (per PR #26776), consider keeping this API and updating it to accept the whitelist parameter instead of removing it.

Suggested change
// Validate that any external data referenced by the given TensorProto resides within the
// provided whitelist of allowed locations. Implementations should return a non-OK Status
// if any external data path is outside the whitelist.
virtual Status Utils__ValidateExternalDataPath(
const ONNX_NAMESPACE::TensorProto& tensor_proto,
const std::vector<std::filesystem::path>& allowed_locations) = 0;

Copilot uses AI. Check for mistakes.
virtual Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
const std::filesystem::path& location) = 0;

// Model
virtual std::unique_ptr<Model> Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/session/abi_session_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,14 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetLoadCancellationFlag, _Inout_ OrtS
return nullptr;
API_IMPL_END
}

ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* whitelisted_data_folders) {
API_IMPL_BEGIN
if (whitelisted_data_folders == nullptr) {
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Input whitelisted_data_folders is nullptr");
}
options->value.whitelisted_data_folders = whitelisted_data_folders;
return nullptr;
API_IMPL_END
}
5 changes: 4 additions & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1390,7 +1390,10 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
// auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
// use_tensor_buffer_true);
// ORT_RETURN_IF_ERROR(graph.ReplaceInitializedTensor(tensor_proto_to_add, ort_value));
ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues());
InlinedVector<std::filesystem::path> whitelisted_external_data_folders;
ORT_RETURN_IF_ERROR_SESSIONID_(utils::ParseWhiteListedPaths(session_options_.whitelisted_data_folders,
whitelisted_external_data_folders));
ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues(whitelisted_external_data_folders));

auto apply_transformer_once = [](const GraphTransformer& transformer, const logging::Logger& logger,
Graph& graph, bool* is_graph_modified = nullptr) -> onnxruntime::common::Status {
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/session/onnxruntime_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4803,6 +4803,7 @@ static constexpr OrtApi ort_api_1_to_25 = {
&OrtApis::EpAssignedNode_GetOperatorType,
&OrtApis::RunOptionsSetSyncStream,
&OrtApis::GetTensorElementTypeAndShapeDataReference,
&OrtApis::SessionOptionsSetWhiteListedDataFolders,
// End of Version 24 - DO NOT MODIFY ABOVE (see above text for more information)

&OrtApis::RunOptionsEnableProfiling,
Expand Down Expand Up @@ -4843,7 +4844,7 @@ static_assert(offsetof(OrtApi, SetEpDynamicOptions) / sizeof(void*) == 284, "Siz

static_assert(offsetof(OrtApi, GetEpApi) / sizeof(void*) == 317, "Size of version 22 API cannot change");
static_assert(offsetof(OrtApi, CreateExternalInitializerInfo) / sizeof(void*) == 389, "Size of version 23 API cannot change");
static_assert(offsetof(OrtApi, GetTensorElementTypeAndShapeDataReference) / sizeof(void*) == 414, "Size of version 24 API cannot change");
static_assert(offsetof(OrtApi, SessionOptionsSetWhiteListedDataFolders) / sizeof(void*) == 415, "Size of version 24 API cannot change");

// So that nobody forgets to finish an API version, this check will serve as a reminder:
static_assert(std::string_view(ORT_VERSION) == "1.25.0",
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/session/ort_apis.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ ORT_API_STATUS_IMPL(CreateCustomOpDomain, _In_ const char* domain, _Outptr_ OrtC
ORT_API_STATUS_IMPL(CustomOpDomain_Add, _Inout_ OrtCustomOpDomain* custom_op_domain, _In_ const OrtCustomOp* op);
ORT_API_STATUS_IMPL(AddCustomOpDomain, _Inout_ OrtSessionOptions* options, _In_ OrtCustomOpDomain* custom_op_domain);
ORT_API_STATUS_IMPL(RegisterCustomOpsLibrary, _Inout_ OrtSessionOptions* options, _In_ const char* library_path, _Outptr_ void** library_handle);

ORT_API_STATUS_IMPL(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* whitelisted_data_folders);
ORT_API_STATUS_IMPL(SessionGetInputCount, _In_ const OrtSession* sess, _Out_ size_t* out);
ORT_API_STATUS_IMPL(SessionGetOutputCount, _In_ const OrtSession* sess, _Out_ size_t* out);
ORT_API_STATUS_IMPL(SessionGetOverridableInitializerCount, _In_ const OrtSession* sess, _Out_ size_t* out);
Expand Down
5 changes: 0 additions & 5 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1295,11 +1295,6 @@ struct ProviderHostImpl : ProviderHost {
return onnxruntime::utils::HasExternalDataInMemory(ten_proto);
}

Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
const std::filesystem::path& location) override {
return onnxruntime::utils::ValidateExternalDataPath(base_path, location);
}

// Model (wrapped)
std::unique_ptr<Model> Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
Expand Down
Loading
Loading