Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1461,12 +1461,23 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
return Resolve(default_options);
}

/// <summary>
/// This function converts all the graph TensorProto initializers into OrtValues
/// and creates a in-memory external data reference for each OrtValue. It validates external paths data references.
/// </summary>
/// <param name="whitelisted_external_paths"></param>
/// <returns></returns>
Status ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path> whitelisted_external_paths);

/// <summary>
/// This function converts all the graph TensorProto initializers into OrtValues
/// and creates a in-memory external data reference for each OrtValue.
/// External data paths are restricted to the model directory.
/// </summary>
/// <returns></returns>
Status ConvertInitializersIntoOrtValues();
Status ConvertInitializersIntoOrtValues() {
return ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path>());
}

/**
* @brief This function examines the specified initializers in the graph and converts them inline
Expand Down
17 changes: 17 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -7221,6 +7221,23 @@ struct OrtApi {
_Outptr_result_maybenull_ const int64_t** shape_data,
_Out_ size_t* shape_data_count);

/** \brief Set whitelisted data folders for external data loading.
*
* Sets a semicolon-separated list of absolute directory paths that are allowed as sources
* for external data. Each path must be an absolute path to an existing directory and must not
* be a symbolic link.
*
* \param[in] options Session options instance.
* \param[in] whitelisted_data_folders Semicolon-separated list of absolute directory paths, or
* an empty string to clear the whitelist. This pointer must not be NULL.
*
* \return nullptr on success, or an OrtStatus on failure.
*
* \since Version 1.24.
*/
ORT_API2_STATUS(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* whitelisted_data_folders);

/** \brief Enable profiling for this run
*
* \param[in] options
Expand Down
2 changes: 2 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1558,6 +1558,8 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl<T> {

///< Wraps OrtApi::AddFreeDimensionOverrideByName
SessionOptionsImpl& AddFreeDimensionOverrideByName(const char* dim_name, int64_t dim_value);
///< Wraps OrtApi::SessionOptionsSetWhiteListedDataFolders
SessionOptionsImpl& SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders);
};
} // namespace detail

Expand Down
6 changes: 6 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,12 @@ inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLoadCancellationFlag(boo
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetWhiteListedDataFolders(const ORTCHAR_T* whitelisted_data_folders) {
ThrowOnError(GetApi().SessionOptionsSetWhiteListedDataFolders(this->p_, whitelisted_data_folders));
return *this;
}

template <typename T>
inline SessionOptionsImpl<T>& SessionOptionsImpl<T>::SetLogId(const char* logid) {
ThrowOnError(GetApi().SetSessionLogId(this->p_, logid));
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/framework/session_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ struct SessionOptions {
bool has_explicit_ep_context_gen_options = false;
epctx::ModelGenOptions ep_context_gen_options = {};
epctx::ModelGenOptions GetEpContextGenerationOptions() const;

// Semicolon-separated list of whitelisted data folder paths.
// Used to restrict where external data can be loaded from.
PathString whitelisted_data_folders;
};

inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_options) {
Expand Down
118 changes: 108 additions & 10 deletions onnxruntime/core/framework/tensorprotoutils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,24 +328,122 @@ Status TensorProtoWithExternalDataToTensorProto(
return Status::OK();
}

Status ParseWhiteListedPaths(const PathString& paths_str,
/*out*/ InlinedVector<std::filesystem::path>& paths) {
if (paths_str.empty()) {
paths.clear();
return Status::OK();
}

InlinedVector<std::filesystem::path> result;

auto process_path = [&](const PathString& p_str) -> Status {
if (p_str.empty()) return Status::OK();
std::filesystem::path path(p_str);
std::error_code ec;
if (!path.is_absolute()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path is not absolute: ", path.string());
}
// canonical() resolves all symlinks and requires the path to exist.
// If it fails, the path either doesn't exist or can't be resolved.
auto canonical_path = std::filesystem::canonical(path, ec);
if (ec) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path does not exist or cannot be resolved: ", path.string());
}
// Walk each component of the canonical path and check for symlinks.
// We choose with approach because both canonical() and weakly_canonical() on Windows
// (MSVC's <filesystem> implementation) resolve symlinks for existing path components
// using the same underlying Win32 API (GetFinalPathNameByHandle).
// So comparing them always produces an equal result, making symlink detection impossible via comparison.
// We check the canonical path (not the original) so that normalization differences
// (trailing slashes, "..", ".") don't interfere, while still detecting symlinks
// that may exist along the resolved path.
{
auto normalized = path.lexically_normal();
std::filesystem::path accumulated;
for (const auto& component : normalized) {
accumulated /= component;
// Skip checking the root (e.g. "C:\" or "/") since is_symlink would fail or be meaningless.
if (accumulated == normalized.root_path()) {
continue;
}
if (std::filesystem::is_symlink(accumulated, ec)) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path contains a symlink: ", path.string());
}
}
}

if (!std::filesystem::is_directory(canonical_path, ec) || ec) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Whitelisted data path is not a directory: ", path.string());
}
result.push_back(canonical_path);
return Status::OK();
};

constexpr PathChar kSemiColonSep = ORT_TSTR(';');

size_t start = 0;
size_t end = paths_str.find(kSemiColonSep);

while (end != PathString::npos) {
ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start, end - start)));
start = end + 1;
end = paths_str.find(kSemiColonSep, start);
}
ORT_RETURN_IF_ERROR(process_path(paths_str.substr(start)));

paths = std::move(result);
return Status::OK();
}

Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
const std::filesystem::path& location) {
const std::filesystem::path& location,
gsl::span<const std::filesystem::path> whitelisted_external_folders) {
// Reject absolute paths
ORT_RETURN_IF(location.is_absolute(),
"Absolute paths not allowed for external data location");
if (!base_dir.empty()) {
// Resolve and verify the path stays within model directory
auto base_canonical = std::filesystem::weakly_canonical(base_dir);
// If the symlink exists, it resolves to the target path;
// so if the symllink is outside the directory it would be caught here.
auto resolved = std::filesystem::weakly_canonical(base_dir / location);
// Check that resolved path starts with base directory

auto validate_location_under_dir = [&location](const std::filesystem::path& dir) -> bool {
if (dir.empty()) {
return false;
}
auto base_canonical = std::filesystem::weakly_canonical(dir);
auto resolved = std::filesystem::weakly_canonical(dir / location);
auto [base_end, resolved_it] = std::mismatch(
base_canonical.begin(), base_canonical.end(),
resolved.begin(), resolved.end());
ORT_RETURN_IF(base_end != base_canonical.end(),
"External data path: ", location, " escapes model directory: ", base_dir);
return base_end == base_canonical.end();
};

if (!base_dir.empty()) {
if (validate_location_under_dir(base_dir)) {
return Status::OK();
}
}

// base_dir validation failed or base_dir is empty, try whitelisted folders
if (!whitelisted_external_folders.empty()) {
for (const auto& folder : whitelisted_external_folders) {
if (validate_location_under_dir(folder)) {
return Status::OK();
}
}

return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"External data path: ", location,
" is not under any allowed directory");
}

// No whitelisted folders supplied
if (!base_dir.empty()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"External data path: ", location, " escapes model directory: ", base_dir);
}

return Status::OK();
}

Expand Down
18 changes: 16 additions & 2 deletions onnxruntime/core/framework/tensorprotoutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -525,17 +525,31 @@ Status TensorProtoWithExternalDataToTensorProto(
const std::filesystem::path& model_path,
ONNX_NAMESPACE::TensorProto& new_tensor_proto);

/// <summary>
/// This function parses the input string which is expected to be a list of paths separated by ';'
/// and returns a vector of std::filesystem::paths. The function also validates that each path is an absolute path of a
/// folder, it is not a symlink and actually exists on the file system.
/// </summary>
/// <param name="paths_str"></param>
/// <param name="paths"></param>
/// <returns>Status</returns>
Status ParseWhiteListedPaths(const PathString& paths_str,
/*out*/ InlinedVector<std::filesystem::path>& paths);

/// <summary>
/// The functions will make sure the 'location' specified in the external data is under the 'base_dir'.
/// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path.
/// If validation fails for base_dir, the function will check against whitelisted_external_folders.
/// </summary>
/// <param name="base_dir">model location directory</param>
/// <param name="location">location is a string retrieved from TensorProto external data that is not
/// an in-memory tag</param>
/// <param name="whitelisted_external_folders">additional folders where external data is allowed</param>
/// <returns>The function will fail if the resolved full path is not under the model directory
/// or one of the subdirectories</returns>
/// or one of the whitelisted folders</returns>
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
const std::filesystem::path& location);
const std::filesystem::path& location,
gsl::span<const std::filesystem::path> whitelisted_external_folders = {});

#endif // !defined(SHARED_PROVIDER)

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3737,7 +3737,7 @@ Status Graph::Resolve(const ResolveOptions& options) {
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
}

Status Graph::ConvertInitializersIntoOrtValues() {
Status Graph::ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path> whitelisted_external_paths) {
std::vector<Graph*> all_subgraphs;
FindAllSubgraphs(all_subgraphs);

Expand Down Expand Up @@ -3771,7 +3771,7 @@ Status Graph::ConvertInitializersIntoOrtValues() {
std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
const auto& location = external_data_info->GetRelPath();
auto st = utils::ValidateExternalDataPath(model_dir, location);
auto st = utils::ValidateExternalDataPath(model_dir, location, whitelisted_external_paths);
if (!st.IsOK()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
"External data path validation failed for initializer: ", tensor_proto.name(),
Expand Down
5 changes: 0 additions & 5 deletions onnxruntime/core/providers/shared_library/provider_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,11 +453,6 @@ inline bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto
return g_host->Utils__HasExternalDataInMemory(ten_proto);
}

Copy link

Copilot AI Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

provider_api.h no longer provides utils::ValidateExternalDataPath for shared providers. Given tensorprotoutils.h hides ValidateExternalDataPath behind !SHARED_PROVIDER, this removes the ability for shared providers to perform the validation. Consider reintroducing this wrapper with the new whitelist-aware signature if shared providers still need it.

Suggested change
inline Status ValidateExternalDataPath(const PathString& model_path,
const PathString& external_data_path,
const std::vector<PathString>* allowed_paths,
bool* is_whitelisted) {
return g_host->Utils__ValidateExternalDataPath(model_path, external_data_path, allowed_paths, is_whitelisted);
}

Copilot uses AI. Check for mistakes.
inline Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
const std::filesystem::path& location) {
return g_host->Utils__ValidateExternalDataPath(base_dir, location);
}

} // namespace utils

namespace graph_utils {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1004,9 +1004,6 @@ struct ProviderHost {

virtual bool Utils__HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto) = 0;

Copy link

Copilot AI Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing Utils__ValidateExternalDataPath from ProviderHost eliminates the only ValidateExternalDataPath entrypoint available to shared-provider code (tensorprotoutils.h excludes it under SHARED_PROVIDER). If bridge/shared EPs still need to validate external data locations (per PR #26776), consider keeping this API and updating it to accept the whitelist parameter instead of removing it.

Suggested change
// Validate that any external data referenced by the given TensorProto resides within the
// provided whitelist of allowed locations. Implementations should return a non-OK Status
// if any external data path is outside the whitelist.
virtual Status Utils__ValidateExternalDataPath(
const ONNX_NAMESPACE::TensorProto& tensor_proto,
const std::vector<std::filesystem::path>& allowed_locations) = 0;

Copilot uses AI. Check for mistakes.
virtual Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
const std::filesystem::path& location) = 0;

// Model
virtual std::unique_ptr<Model> Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/session/abi_session_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,3 +413,14 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetLoadCancellationFlag, _Inout_ OrtS
return nullptr;
API_IMPL_END
}

ORT_API_STATUS_IMPL(OrtApis::SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* whitelisted_data_folders) {
API_IMPL_BEGIN
if (whitelisted_data_folders == nullptr) {
return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Input whitelisted_data_folders is nullptr");
}
options->value.whitelisted_data_folders = whitelisted_data_folders;
return nullptr;
API_IMPL_END
}
5 changes: 4 additions & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1390,7 +1390,10 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
// auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
// use_tensor_buffer_true);
// ORT_RETURN_IF_ERROR(graph.ReplaceInitializedTensor(tensor_proto_to_add, ort_value));
ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues());
InlinedVector<std::filesystem::path> whitelisted_external_data_folders;
ORT_RETURN_IF_ERROR_SESSIONID_(utils::ParseWhiteListedPaths(session_options_.whitelisted_data_folders,
whitelisted_external_data_folders));
ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues(whitelisted_external_data_folders));

auto apply_transformer_once = [](const GraphTransformer& transformer, const logging::Logger& logger,
Graph& graph, bool* is_graph_modified = nullptr) -> onnxruntime::common::Status {
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/session/onnxruntime_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4803,6 +4803,7 @@ static constexpr OrtApi ort_api_1_to_25 = {
&OrtApis::EpAssignedNode_GetOperatorType,
&OrtApis::RunOptionsSetSyncStream,
&OrtApis::GetTensorElementTypeAndShapeDataReference,
&OrtApis::SessionOptionsSetWhiteListedDataFolders,
// End of Version 24 - DO NOT MODIFY ABOVE (see above text for more information)

&OrtApis::RunOptionsEnableProfiling,
Expand Down Expand Up @@ -4843,7 +4844,7 @@ static_assert(offsetof(OrtApi, SetEpDynamicOptions) / sizeof(void*) == 284, "Siz

static_assert(offsetof(OrtApi, GetEpApi) / sizeof(void*) == 317, "Size of version 22 API cannot change");
static_assert(offsetof(OrtApi, CreateExternalInitializerInfo) / sizeof(void*) == 389, "Size of version 23 API cannot change");
static_assert(offsetof(OrtApi, GetTensorElementTypeAndShapeDataReference) / sizeof(void*) == 414, "Size of version 24 API cannot change");
static_assert(offsetof(OrtApi, SessionOptionsSetWhiteListedDataFolders) / sizeof(void*) == 415, "Size of version 24 API cannot change");

// So that nobody forgets to finish an API version, this check will serve as a reminder:
static_assert(std::string_view(ORT_VERSION) == "1.25.0",
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/session/ort_apis.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ ORT_API_STATUS_IMPL(CreateCustomOpDomain, _In_ const char* domain, _Outptr_ OrtC
ORT_API_STATUS_IMPL(CustomOpDomain_Add, _Inout_ OrtCustomOpDomain* custom_op_domain, _In_ const OrtCustomOp* op);
ORT_API_STATUS_IMPL(AddCustomOpDomain, _Inout_ OrtSessionOptions* options, _In_ OrtCustomOpDomain* custom_op_domain);
ORT_API_STATUS_IMPL(RegisterCustomOpsLibrary, _Inout_ OrtSessionOptions* options, _In_ const char* library_path, _Outptr_ void** library_handle);

ORT_API_STATUS_IMPL(SessionOptionsSetWhiteListedDataFolders, _Inout_ OrtSessionOptions* options,
_In_ const ORTCHAR_T* whitelisted_data_folders);
ORT_API_STATUS_IMPL(SessionGetInputCount, _In_ const OrtSession* sess, _Out_ size_t* out);
ORT_API_STATUS_IMPL(SessionGetOutputCount, _In_ const OrtSession* sess, _Out_ size_t* out);
ORT_API_STATUS_IMPL(SessionGetOverridableInitializerCount, _In_ const OrtSession* sess, _Out_ size_t* out);
Expand Down
5 changes: 0 additions & 5 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1295,11 +1295,6 @@ struct ProviderHostImpl : ProviderHost {
return onnxruntime::utils::HasExternalDataInMemory(ten_proto);
}

Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
const std::filesystem::path& location) override {
return onnxruntime::utils::ValidateExternalDataPath(base_path, location);
}

// Model (wrapped)
std::unique_ptr<Model> Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
const IOnnxRuntimeOpSchemaRegistryList* local_registries,
Expand Down
Loading
Loading