Skip to content

Commit 0f9147c

Browse files
committed
Test validate path
1 parent 4f70b56 commit 0f9147c

File tree

10 files changed

+135
-31
lines changed

10 files changed

+135
-31
lines changed

include/onnxruntime/core/graph/graph.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,10 +1463,11 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
14631463

14641464
/// <summary>
14651465
/// This function converts all the graph TensorProto initializers into OrtValues
1466-
/// and creates a in-memory external data reference for each OrtValue.
1466+
/// and creates a in-memory external data reference for each OrtValue. It validates external paths data references.
14671467
/// </summary>
1468+
/// <param name="whitelisted_external_paths"></param>
14681469
/// <returns></returns>
1469-
Status ConvertInitializersIntoOrtValues();
1470+
Status ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path> whitelisted_external_paths);
14701471

14711472
/**
14721473
* @brief This function examines the specified initializers in the graph and converts them inline

onnxruntime/core/framework/tensorprotoutils.cc

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -401,23 +401,49 @@ Status ParseWhiteListedPaths(const PathString& paths_str,
401401
}
402402

403403
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
404-
const std::filesystem::path& location) {
404+
const std::filesystem::path& location,
405+
gsl::span<const std::filesystem::path> whitelisted_external_folders) {
405406
// Reject absolute paths
406407
ORT_RETURN_IF(location.is_absolute(),
407408
"Absolute paths not allowed for external data location");
408-
if (!base_dir.empty()) {
409-
// Resolve and verify the path stays within model directory
410-
auto base_canonical = std::filesystem::weakly_canonical(base_dir);
411-
// If the symlink exists, it resolves to the target path;
412-
// so if the symlink is outside the directory it would be caught here.
413-
auto resolved = std::filesystem::weakly_canonical(base_dir / location);
414-
// Check that resolved path starts with base directory
409+
410+
auto validate_location_under_dir = [&location](const std::filesystem::path& dir) -> bool {
411+
if (dir.empty()) {
412+
return false;
413+
}
414+
auto base_canonical = std::filesystem::weakly_canonical(dir);
415+
auto resolved = std::filesystem::weakly_canonical(dir / location);
415416
auto [base_end, resolved_it] = std::mismatch(
416417
base_canonical.begin(), base_canonical.end(),
417418
resolved.begin(), resolved.end());
418-
ORT_RETURN_IF(base_end != base_canonical.end(),
419-
"External data path: ", location, " escapes model directory: ", base_dir);
419+
return base_end == base_canonical.end();
420+
};
421+
422+
if (!base_dir.empty()) {
423+
if (validate_location_under_dir(base_dir)) {
424+
return Status::OK();
425+
}
420426
}
427+
428+
// base_dir validation failed or base_dir is empty, try whitelisted folders
429+
if (!whitelisted_external_folders.empty()) {
430+
for (const auto& folder : whitelisted_external_folders) {
431+
if (validate_location_under_dir(folder)) {
432+
return Status::OK();
433+
}
434+
}
435+
436+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
437+
"External data path: ", location,
438+
" is not under any allowed directory");
439+
}
440+
441+
// No whitelisted folders supplied
442+
if (!base_dir.empty()) {
443+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
444+
"External data path: ", location, " escapes model directory: ", base_dir);
445+
}
446+
421447
return Status::OK();
422448
}
423449

onnxruntime/core/framework/tensorprotoutils.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,14 +539,17 @@ Status ParseWhiteListedPaths(const PathString& paths_str,
539539
/// <summary>
540540
/// The functions will make sure the 'location' specified in the external data is under the 'base_dir'.
541541
/// If the `base_dir` is empty, the function only ensures that `location` is not an absolute path.
542+
/// If validation fails for base_dir, the function will check against whitelisted_external_folders.
542543
/// </summary>
543544
/// <param name="base_dir">model location directory</param>
544545
/// <param name="location">location is a string retrieved from TensorProto external data that is not
545546
/// an in-memory tag</param>
547+
/// <param name="whitelisted_external_folders">additional folders where external data is allowed</param>
546548
/// <returns>The function will fail if the resolved full path is not under the model directory
547-
/// or one of the subdirectories</returns>
549+
/// or one of the whitelisted folders</returns>
548550
Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
549-
const std::filesystem::path& location);
551+
const std::filesystem::path& location,
552+
gsl::span<const std::filesystem::path> whitelisted_external_folders = {});
550553

551554
#endif // !defined(SHARED_PROVIDER)
552555

onnxruntime/core/graph/graph.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3737,7 +3737,7 @@ Status Graph::Resolve(const ResolveOptions& options) {
37373737
return ForThisAndAllSubgraphs(all_subgraphs, finalize_func);
37383738
}
37393739

3740-
Status Graph::ConvertInitializersIntoOrtValues() {
3740+
Status Graph::ConvertInitializersIntoOrtValues(gsl::span<const std::filesystem::path> whitelisted_external_paths) {
37413741
std::vector<Graph*> all_subgraphs;
37423742
FindAllSubgraphs(all_subgraphs);
37433743

@@ -3771,7 +3771,7 @@ Status Graph::ConvertInitializersIntoOrtValues() {
37713771
std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
37723772
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
37733773
const auto& location = external_data_info->GetRelPath();
3774-
auto st = utils::ValidateExternalDataPath(model_dir, location);
3774+
auto st = utils::ValidateExternalDataPath(model_dir, location, whitelisted_external_paths);
37753775
if (!st.IsOK()) {
37763776
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
37773777
"External data path validation failed for initializer: ", tensor_proto.name(),

onnxruntime/core/providers/shared_library/provider_api.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -453,11 +453,6 @@ inline bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto
453453
return g_host->Utils__HasExternalDataInMemory(ten_proto);
454454
}
455455

456-
inline Status ValidateExternalDataPath(const std::filesystem::path& base_dir,
457-
const std::filesystem::path& location) {
458-
return g_host->Utils__ValidateExternalDataPath(base_dir, location);
459-
}
460-
461456
} // namespace utils
462457

463458
namespace graph_utils {

onnxruntime/core/providers/shared_library/provider_interfaces.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,9 +1004,6 @@ struct ProviderHost {
10041004

10051005
virtual bool Utils__HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& ten_proto) = 0;
10061006

1007-
virtual Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
1008-
const std::filesystem::path& location) = 0;
1009-
10101007
// Model
10111008
virtual std::unique_ptr<Model> Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
10121009
const IOnnxRuntimeOpSchemaRegistryList* local_registries,

onnxruntime/core/session/inference_session.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1390,7 +1390,10 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
13901390
// auto tensor_proto_to_add = utils::TensorToTensorProto(ort_value.Get<Tensor>(), tensor_proto.name(),
13911391
// use_tensor_buffer_true);
13921392
// ORT_RETURN_IF_ERROR(graph.ReplaceInitializedTensor(tensor_proto_to_add, ort_value));
1393-
ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues());
1393+
InlinedVector<std::filesystem::path> whitelisted_external_data_folders;
1394+
ORT_RETURN_IF_ERROR_SESSIONID_(utils::ParseWhiteListedPaths(session_options_.whitelisted_data_folders,
1395+
whitelisted_external_data_folders));
1396+
ORT_RETURN_IF_ERROR_SESSIONID_(graph.ConvertInitializersIntoOrtValues(whitelisted_external_data_folders));
13941397

13951398
auto apply_transformer_once = [](const GraphTransformer& transformer, const logging::Logger& logger,
13961399
Graph& graph, bool* is_graph_modified = nullptr) -> onnxruntime::common::Status {

onnxruntime/core/session/provider_bridge_ort.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,11 +1295,6 @@ struct ProviderHostImpl : ProviderHost {
12951295
return onnxruntime::utils::HasExternalDataInMemory(ten_proto);
12961296
}
12971297

1298-
Status Utils__ValidateExternalDataPath(const std::filesystem::path& base_path,
1299-
const std::filesystem::path& location) override {
1300-
return onnxruntime::utils::ValidateExternalDataPath(base_path, location);
1301-
}
1302-
13031298
// Model (wrapped)
13041299
std::unique_ptr<Model> Model__construct(ONNX_NAMESPACE::ModelProto&& model_proto, const PathString& model_path,
13051300
const IOnnxRuntimeOpSchemaRegistryList* local_registries,

onnxruntime/test/framework/tensorutils_test.cc

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,18 +511,22 @@ class PathValidationTest : public ::testing::Test {
511511
// Create a temporary directory for the tests.
512512
base_dir_ = std::filesystem::temp_directory_path() / "PathValidationTest";
513513
outside_dir_ = std::filesystem::temp_directory_path() / "outside";
514+
whitelisted_dir_ = std::filesystem::temp_directory_path() / "whitelisted";
514515
std::filesystem::create_directories(base_dir_);
515516
std::filesystem::create_directories(outside_dir_);
517+
std::filesystem::create_directories(whitelisted_dir_);
516518
}
517519

518520
void TearDown() override {
519521
// Clean up the temporary directory.
520522
std::filesystem::remove_all(base_dir_);
521523
std::filesystem::remove_all(outside_dir_);
524+
std::filesystem::remove_all(whitelisted_dir_);
522525
}
523526

524527
std::filesystem::path base_dir_;
525528
std::filesystem::path outside_dir_;
529+
std::filesystem::path whitelisted_dir_;
526530
};
527531

528532
// Test cases for ValidateExternalDataPath.
@@ -586,6 +590,85 @@ TEST_F(PathValidationTest, ValidateExternalDataPathWithSymlinkOutside) {
586590
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "outside_link.bin").IsOK());
587591
}
588592

593+
TEST_F(PathValidationTest, ValidateExternalDataPathWithWhitelistedFolder) {
594+
// Path is valid under the whitelisted folder directly.
595+
std::vector<std::filesystem::path> whitelist = {outside_dir_};
596+
ASSERT_STATUS_OK(utils::ValidateExternalDataPath(outside_dir_, "data.bin", whitelist));
597+
}
598+
599+
TEST_F(PathValidationTest, ValidateExternalDataPathEscapesBaseButMatchesWhitelist) {
600+
std::vector<std::filesystem::path> whitelist = {whitelisted_dir_};
601+
// "data.bin" is valid under base_dir_, no need for whitelist
602+
ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, "data.bin", whitelist));
603+
604+
// Create a subdirectory of whitelisted_dir_ and use that as the whitelisted folder.
605+
auto whitelisted_sub = whitelisted_dir_ / "sub";
606+
std::filesystem::create_directories(whitelisted_sub);
607+
std::vector<std::filesystem::path> whitelist2 = {whitelisted_sub};
608+
609+
// "../data.bin" escapes base_dir_ and also escapes whitelisted_sub
610+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin").IsOK());
611+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin", whitelist2).IsOK());
612+
}
613+
614+
TEST_F(PathValidationTest, ValidateExternalDataPathWhitelistSavesEscapingPath) {
615+
// Location "../outside/data.bin" escapes base_dir_ but resolves under outside_dir_.
616+
auto relative_to_outside = std::filesystem::path("..") / "outside" / "data.bin";
617+
std::vector<std::filesystem::path> whitelist = {outside_dir_};
618+
619+
// Without whitelist, it should fail.
620+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, relative_to_outside).IsOK());
621+
622+
// With whitelist containing outside_dir_, it should succeed because
623+
// outside_dir_ / "../outside/data.bin" resolves under outside_dir_.
624+
ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, relative_to_outside, whitelist));
625+
}
626+
627+
TEST_F(PathValidationTest, ValidateExternalDataPathWhitelistDoesNotMatchEither) {
628+
// Location escapes both base_dir and all whitelisted folders.
629+
auto unrelated_dir = std::filesystem::temp_directory_path() / "unrelated_PathValidationTest";
630+
std::filesystem::create_directories(unrelated_dir);
631+
auto cleanup = [&]() { std::filesystem::remove_all(unrelated_dir); };
632+
633+
std::vector<std::filesystem::path> whitelist = {whitelisted_dir_};
634+
auto escaping_location = std::filesystem::path("..") / "unrelated_PathValidationTest" / "data.bin";
635+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, escaping_location, whitelist).IsOK());
636+
637+
cleanup();
638+
}
639+
640+
TEST_F(PathValidationTest, ValidateExternalDataPathEmptyWhitelist) {
641+
// Empty whitelist should behave the same as no whitelist.
642+
std::vector<std::filesystem::path> empty_whitelist;
643+
ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, "data.bin", empty_whitelist));
644+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "../data.bin", empty_whitelist).IsOK());
645+
}
646+
647+
TEST_F(PathValidationTest, ValidateExternalDataPathMultipleWhitelistedFolders) {
648+
// First whitelisted folder doesn't match, second one does.
649+
auto another_dir = std::filesystem::temp_directory_path() / "another_PathValidationTest";
650+
std::filesystem::create_directories(another_dir);
651+
auto cleanup = [&]() { std::filesystem::remove_all(another_dir); };
652+
653+
auto relative_to_outside = std::filesystem::path("..") / "outside" / "data.bin";
654+
std::vector<std::filesystem::path> whitelist = {another_dir, outside_dir_};
655+
656+
// Escapes base_dir_ but outside_dir_ (second whitelist entry) should match.
657+
ASSERT_STATUS_OK(utils::ValidateExternalDataPath(base_dir_, relative_to_outside, whitelist));
658+
659+
cleanup();
660+
}
661+
662+
TEST_F(PathValidationTest, ValidateExternalDataPathAbsoluteLocationRejectsEvenWithWhitelist) {
663+
// Absolute paths are always rejected, regardless of whitelist.
664+
std::vector<std::filesystem::path> whitelist = {outside_dir_};
665+
#ifdef _WIN32
666+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "C:\\data.bin", whitelist).IsOK());
667+
#else
668+
ASSERT_FALSE(utils::ValidateExternalDataPath(base_dir_, "/data.bin", whitelist).IsOK());
669+
#endif
670+
}
671+
589672
// Test fixture for ParseWhiteListedPaths tests.
590673
class ParseWhiteListedPathsTest : public ::testing::Test {
591674
protected:
@@ -773,5 +856,6 @@ TEST_F(ParseWhiteListedPathsTest, OutParamUnchangedOnError) {
773856
ASSERT_EQ(paths.size(), 1u);
774857
EXPECT_EQ(paths[0], sub_dir_a_);
775858
}
859+
776860
} // namespace test
777861
} // namespace onnxruntime

onnxruntime/test/ir/graph_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2817,7 +2817,7 @@ TEST_F(GraphTest, ShapeInferenceAfterInitializerExternalization) {
28172817
<< "We no longer externalize data in the Graph constructor.";
28182818

28192819
// Now externalize explicitly to trigger the bug scenario
2820-
ASSERT_STATUS_OK(graph.ConvertInitializersIntoOrtValues());
2820+
ASSERT_STATUS_OK(graph.ConvertInitializersIntoOrtValues({}));
28212821
ASSERT_TRUE(graph.GetInitializedTensor("split_sizes", initializer_after));
28222822
ASSERT_NE(initializer_after, nullptr);
28232823
ASSERT_TRUE(utils::HasExternalDataInMemory(*initializer_after))

0 commit comments

Comments
 (0)