Skip to content

Commit 7333a36

Browse files
committed
feat: enable file merging by last modification time using preserve-insertion-order
This change leverages the previously unused `preserve-insertion-order` configuration to enable merging files sorted by their last modification time during compaction. This is particularly beneficial for append-only workloads, improving data locality after optimize runs by merging files that were created around similar times. Signed-off-by: esarili <[email protected]>
1 parent f67e828 commit 7333a36

File tree

2 files changed

+71
-3
lines changed

2 files changed

+71
-3
lines changed

crates/core/src/operations/optimize.rs

+13-3
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ impl<'a> std::future::IntoFuture for OptimizeBuilder<'a> {
331331
this.filters,
332332
this.target_size.to_owned(),
333333
writer_properties,
334+
this.preserve_insertion_order,
334335
)?;
335336
let metrics = plan
336337
.execute(
@@ -877,12 +878,15 @@ pub fn create_merge_plan(
877878
filters: &[PartitionFilter],
878879
target_size: Option<i64>,
879880
writer_properties: WriterProperties,
881+
preserve_insertion_order: bool,
880882
) -> Result<MergePlan, DeltaTableError> {
881883
let target_size = target_size.unwrap_or_else(|| snapshot.table_config().target_file_size());
882884
let partitions_keys = &snapshot.metadata().partition_columns;
883885

884886
let (operations, metrics) = match optimize_type {
885-
OptimizeType::Compact => build_compaction_plan(snapshot, filters, target_size)?,
887+
OptimizeType::Compact => {
888+
build_compaction_plan(snapshot, filters, target_size, preserve_insertion_order)?
889+
}
886890
OptimizeType::ZOrder(zorder_columns) => {
887891
build_zorder_plan(zorder_columns, snapshot, partitions_keys, filters)?
888892
}
@@ -958,6 +962,7 @@ fn build_compaction_plan(
958962
snapshot: &DeltaTableState,
959963
filters: &[PartitionFilter],
960964
target_size: i64,
965+
perserve_insertion_order: bool,
961966
) -> Result<(OptimizeOperations, Metrics), DeltaTableError> {
962967
let mut metrics = Metrics::default();
963968

@@ -985,8 +990,13 @@ fn build_compaction_plan(
985990
}
986991

987992
for (_, file) in partition_files.values_mut() {
988-
// Sort files by size: largest to smallest
989-
file.sort_by(|a, b| b.size.cmp(&a.size));
993+
if perserve_insertion_order {
994+
// sort files by modification date
995+
file.sort_by(|a, b| b.last_modified.cmp(&a.last_modified));
996+
} else {
997+
// Sort files by size: largest to smallest
998+
file.sort_by(|a, b| b.size.cmp(&a.size));
999+
}
9901000
}
9911001

9921002
let mut operations: HashMap<String, (IndexMap<String, Scalar>, Vec<MergeBin>)> = HashMap::new();

crates/core/tests/command_optimize.rs

+58
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ async fn test_conflict_for_remove_actions() -> Result<(), Box<dyn Error>> {
289289
&filter,
290290
None,
291291
WriterProperties::builder().build(),
292+
false,
292293
)?;
293294

294295
let uri = context.tmp_dir.path().to_str().to_owned().unwrap();
@@ -351,6 +352,7 @@ async fn test_no_conflict_for_append_actions() -> Result<(), Box<dyn Error>> {
351352
&filter,
352353
None,
353354
WriterProperties::builder().build(),
355+
false,
354356
)?;
355357

356358
let uri = context.tmp_dir.path().to_str().to_owned().unwrap();
@@ -410,6 +412,7 @@ async fn test_commit_interval() -> Result<(), Box<dyn Error>> {
410412
&[],
411413
None,
412414
WriterProperties::builder().build(),
415+
false,
413416
)?;
414417

415418
let metrics = plan
@@ -867,6 +870,61 @@ async fn test_zorder_respects_target_size() -> Result<(), Box<dyn Error>> {
867870
Ok(())
868871
}
869872

873+
#[tokio::test]
874+
async fn test_preserve_insertion_order() -> Result<(), Box<dyn Error>> {
875+
let context = setup_test(true).await?;
876+
let mut dt = context.table;
877+
let mut writer = RecordBatchWriter::for_table(&dt)?;
878+
879+
// first file
880+
write(
881+
&mut writer,
882+
&mut dt,
883+
tuples_to_batch(vec![(1, 1), (1, 2), (1, 3), (1, 4)], "2022-05-22")?,
884+
)
885+
.await?;
886+
887+
// later file
888+
write(
889+
&mut writer,
890+
&mut dt,
891+
tuples_to_batch(vec![(2, 5), (2, 6), (2, 7), (2, 8)], "2022-05-22")?,
892+
)
893+
.await?;
894+
895+
let filter = vec![PartitionFilter::try_from(("date", "=", "2022-05-22"))?];
896+
897+
let optimize = DeltaOps(dt)
898+
.optimize()
899+
.with_target_size(2_000_000)
900+
.with_filters(&filter)
901+
.with_preserve_insertion_order(true);
902+
let (dt, metrics) = optimize.await?;
903+
904+
assert_eq!(metrics.num_files_added, 1);
905+
assert_eq!(metrics.num_files_removed, 2);
906+
assert_eq!(metrics.total_files_skipped, 0);
907+
assert_eq!(metrics.total_considered_files, 2);
908+
909+
// Check data
910+
let files = dt.get_files_iter()?.collect::<Vec<_>>();
911+
assert_eq!(files.len(), 1);
912+
913+
let actual = read_parquet_file(&files[0], dt.object_store()).await?;
914+
let expected = RecordBatch::try_new(
915+
actual.schema(),
916+
// file created later is merged first
917+
vec![
918+
Arc::new(Int32Array::from(vec![2, 2, 2, 2, 1, 1, 1, 1])),
919+
Arc::new(Int32Array::from(vec![5, 6, 7, 8, 1, 2, 3, 4])),
920+
],
921+
)?;
922+
923+
assert_eq!(actual, expected);
924+
925+
Ok(())
926+
}
927+
870928
async fn read_parquet_file(
871929
path: &Path,
872930
object_store: ObjectStoreRef,

0 commit comments

Comments
 (0)