Skip to content

Commit 424ac81

Browse files
authored
Merge pull request #70 from hotdata-dev/fix/empty-catalog
fix: handle empty catalogs where data directory does not exist
2 parents 3be818e + ffa547d commit 424ac81

1 file changed

Lines changed: 44 additions & 10 deletions

File tree

src/path_resolver.rs

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,23 @@ fn parse_file_url(data_path: &str) -> Result<(ObjectStoreUrl, String)> {
174174

175175
/// Parse a local filesystem path into ObjectStoreUrl and key path
176176
fn parse_local_path(data_path: &str) -> Result<(ObjectStoreUrl, String)> {
177+
// Reject paths containing ".." to prevent directory traversal attacks.
178+
// std::path::absolute() does NOT normalize ".." components, so a malicious
179+
// data_path like "/data/../../../etc/" would pass through unchanged.
180+
if data_path.split(['/', '\\']).any(|c| c == "..") {
181+
return Err(DuckLakeError::InvalidConfig(
182+
"Path contains '..' traversal component".to_string(),
183+
));
184+
}
185+
177186
let has_trailing_slash = data_path.ends_with('/') || data_path.ends_with('\\');
178187

179-
let absolute_path = std::path::PathBuf::from(data_path)
180-
.canonicalize()
181-
.map_err(|e| {
182-
DuckLakeError::InvalidConfig(format!("Failed to resolve path '{}': {}", data_path, e))
183-
})?;
188+
// Use std::path::absolute() instead of canonicalize() so that the path
189+
// does not need to exist on disk yet. This supports empty catalogs where
190+
// the data directory has not been created (#57).
191+
let absolute_path = std::path::absolute(data_path).map_err(|e| {
192+
DuckLakeError::InvalidConfig(format!("Failed to resolve path '{}': {}", data_path, e))
193+
})?;
184194

185195
let object_store_url = ObjectStoreUrl::parse("file:///").map_err(|e| {
186196
DuckLakeError::InvalidConfig(format!("Failed to create ObjectStoreUrl: {}", e))
@@ -523,16 +533,40 @@ mod tests {
523533

524534
#[test]
525535
fn test_parse_local_path_nonexistent() {
526-
let result = parse_object_store_url("/nonexistent/path/that/does/not/exist");
536+
// Non-existent paths should now succeed (#57) - the directory does not
537+
// need to exist at catalog creation time (e.g., empty catalogs).
538+
let (url, path) = parse_object_store_url("/nonexistent/path/that/does/not/exist").unwrap();
539+
assert_eq!(url, ObjectStoreUrl::parse("file:///").unwrap());
540+
assert_eq!(path, "/nonexistent/path/that/does/not/exist");
541+
}
542+
543+
#[test]
544+
fn test_parse_local_path_nonexistent_with_trailing_slash() {
545+
let (url, path) = parse_object_store_url("/nonexistent/data/path/").unwrap();
546+
assert_eq!(url, ObjectStoreUrl::parse("file:///").unwrap());
547+
assert_eq!(path, "/nonexistent/data/path/");
548+
}
549+
550+
#[test]
551+
fn test_reject_dotdot_in_local_path() {
552+
// Local paths with ".." must be rejected since std::path::absolute()
553+
// does not normalize them away.
554+
let result = parse_object_store_url("/data/../../../etc/passwd");
527555
assert!(result.is_err());
556+
let err = result.unwrap_err().to_string();
528557
assert!(
529-
result
530-
.unwrap_err()
531-
.to_string()
532-
.contains("Failed to resolve path")
558+
err.contains("'..'"),
559+
"error should mention '..' traversal: {}",
560+
err
533561
);
534562
}
535563

564+
#[test]
565+
fn test_reject_dotdot_in_local_path_backslash() {
566+
let result = parse_object_store_url("/data\\..\\secret");
567+
assert!(result.is_err());
568+
}
569+
536570
#[test]
537571
fn test_join_paths_nested_relative() {
538572
assert_eq!(

0 commit comments

Comments
 (0)