Skip to content

Commit 1ee60eb

Browse files
committed
Avoid loading files when constructing tables
This provides a performance benefit and this setting is _largely_ used by `EagerSnapshot` to do some redundant work when opening tables. `EagerSnapshot` is being whittled down to nothing with everything being a pass-through onto the underlying kernel in future releases. This has been tested with development workloads for oxbow and sqs-ingest with neutral or positive performance changes
1 parent ba407bc commit 1ee60eb

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

crates/oxbow/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ pub async fn convert(
8585
Err(e) => {
8686
info!("No Delta table at {}: {:?}", location, e);
8787
let store = logstore_for(
88-
&location,
88+
location,
8989
StorageConfig::parse_options(storage_options.unwrap_or_default())?,
9090
)?;
9191
let files = discover_parquet_files(store.object_store(None).clone()).await?;

crates/oxbow/src/lock.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use deltalake::DeltaTableBuilder;
12
///
23
/// THe lock module contains some simple helpers for handling table locks in DynamoDB. This is
34
/// something required of deltalake 0.16.x and earlier.
@@ -12,11 +13,12 @@ use std::collections::HashMap;
1213
///Wrapper aroudn [deltalake::open_table] which will open the table with the appropriate storage
1314
///options needed for locking
1415
pub async fn open_table(table_uri: &str) -> deltalake::DeltaResult<deltalake::DeltaTable> {
15-
deltalake::open_table_with_storage_options(
16-
Url::parse(table_uri).expect("Fail"),
17-
storage_options(table_uri),
18-
)
19-
.await
16+
let table_url = Url::parse(table_uri).expect("Fatal error trying to parse a table URL");
17+
DeltaTableBuilder::from_url(table_url)?
18+
.without_files()
19+
.with_storage_options(storage_options(table_uri))
20+
.load()
21+
.await
2022
}
2123

2224
/// Default storage options for using with `deltalake` calls
@@ -83,3 +85,18 @@ pub async fn release(
8385
}
8486
true
8587
}
88+
89+
#[cfg(test)]
90+
mod tests {
91+
use super::*;
92+
93+
use deltalake::DeltaResult;
94+
95+
#[tokio::test]
96+
async fn test_open_table() -> DeltaResult<()> {
97+
let _table = open_table("memory://")
98+
.await
99+
.expect_err("Can't possibly load a table that doesn't exist!");
100+
Ok(())
101+
}
102+
}

0 commit comments

Comments
 (0)