Skip to content

Commit 5f39cbe

Browse files
Shefeek JinnahShefeek Jinnah
authored andcommitted
Simplyfying queries
1 parent b1ca86f commit 5f39cbe

3 files changed

Lines changed: 12 additions & 133 deletions

File tree

src/metadata_provider.rs

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -69,44 +69,21 @@ pub const SQL_TABLE_EXISTS: &str = "SELECT EXISTS(
6969

7070
// Queries for table_changes (CDC) - files added/removed between snapshots
7171

72-
/// Get data files added between two snapshots (for INSERT changes)
73-
/// Returns files where begin_snapshot > start_snapshot AND begin_snapshot <= end_snapshot
7472
pub const SQL_GET_DATA_FILES_ADDED_BETWEEN_SNAPSHOTS: &str = "
75-
SELECT
76-
data.data_file_id,
77-
data.path,
78-
data.path_is_relative,
79-
data.file_size_bytes,
80-
data.footer_size,
81-
data.begin_snapshot
73+
SELECT data.begin_snapshot
8274
FROM ducklake_data_file AS data
8375
WHERE data.table_id = ?
8476
AND data.begin_snapshot > ?
8577
AND data.begin_snapshot <= ?
86-
ORDER BY data.begin_snapshot, data.data_file_id";
78+
ORDER BY data.begin_snapshot";
8779

88-
/// Get delete files added between two snapshots (for DELETE changes)
89-
/// Returns delete files where begin_snapshot > start_snapshot AND begin_snapshot <= end_snapshot
9080
pub const SQL_GET_DELETE_FILES_ADDED_BETWEEN_SNAPSHOTS: &str = "
91-
SELECT
92-
del.delete_file_id,
93-
del.data_file_id,
94-
del.path,
95-
del.path_is_relative,
96-
del.file_size_bytes,
97-
del.footer_size,
98-
del.delete_count,
99-
del.begin_snapshot,
100-
data.path AS data_file_path,
101-
data.path_is_relative AS data_path_is_relative,
102-
data.file_size_bytes AS data_file_size,
103-
data.footer_size AS data_footer_size
81+
SELECT del.begin_snapshot
10482
FROM ducklake_delete_file AS del
105-
JOIN ducklake_data_file AS data ON del.data_file_id = data.data_file_id
10683
WHERE del.table_id = ?
10784
AND del.begin_snapshot > ?
10885
AND del.begin_snapshot <= ?
109-
ORDER BY del.begin_snapshot, del.delete_file_id";
86+
ORDER BY del.begin_snapshot";
11087

11188
// Bulk queries for information_schema (avoids N+1 query problem)
11289

@@ -315,19 +292,12 @@ impl DuckLakeTableFile {
315292

316293
#[derive(Debug, Clone)]
317294
pub struct DataFileChange {
318-
pub data_file_id: i64,
319-
pub file: DuckLakeFileData,
320295
pub begin_snapshot: i64,
321296
}
322297

323298
#[derive(Debug, Clone)]
324299
pub struct DeleteFileChange {
325-
pub delete_file_id: i64,
326-
pub data_file_id: i64,
327-
pub delete_file: DuckLakeFileData,
328-
pub delete_count: Option<i64>,
329300
pub begin_snapshot: i64,
330-
pub data_file: DuckLakeFileData,
331301
}
332302

333303
pub trait MetadataProvider: Send + Sync + std::fmt::Debug {

src/metadata_provider_duckdb.rs

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -392,20 +392,8 @@ impl MetadataProvider for DuckdbMetadataProvider {
392392

393393
let files = stmt
394394
.query_map(params![table_id, start_snapshot, end_snapshot], |row| {
395-
let data_file_id: i64 = row.get(0)?;
396-
let file = DuckLakeFileData {
397-
path: row.get(1)?,
398-
path_is_relative: row.get(2)?,
399-
file_size_bytes: row.get(3)?,
400-
footer_size: row.get(4)?,
401-
encryption_key: String::new(),
402-
};
403-
let begin_snapshot: i64 = row.get(5)?;
404-
405395
Ok(DataFileChange {
406-
data_file_id,
407-
file,
408-
begin_snapshot,
396+
begin_snapshot: row.get(0)?,
409397
})
410398
})?
411399
.collect::<Result<Vec<_>, _>>()?;
@@ -424,32 +412,8 @@ impl MetadataProvider for DuckdbMetadataProvider {
424412

425413
let files = stmt
426414
.query_map(params![table_id, start_snapshot, end_snapshot], |row| {
427-
let delete_file_id: i64 = row.get(0)?;
428-
let data_file_id: i64 = row.get(1)?;
429-
let delete_file = DuckLakeFileData {
430-
path: row.get(2)?,
431-
path_is_relative: row.get(3)?,
432-
file_size_bytes: row.get(4)?,
433-
footer_size: row.get(5)?,
434-
encryption_key: String::new(),
435-
};
436-
let delete_count: Option<i64> = row.get(6)?;
437-
let begin_snapshot: i64 = row.get(7)?;
438-
let data_file = DuckLakeFileData {
439-
path: row.get(8)?,
440-
path_is_relative: row.get(9)?,
441-
file_size_bytes: row.get(10)?,
442-
footer_size: row.get(11)?,
443-
encryption_key: String::new(),
444-
};
445-
446415
Ok(DeleteFileChange {
447-
delete_file_id,
448-
data_file_id,
449-
delete_file,
450-
delete_count,
451-
begin_snapshot,
452-
data_file,
416+
begin_snapshot: row.get(0)?,
453417
})
454418
})?
455419
.collect::<Result<Vec<_>, _>>()?;

src/metadata_provider_postgres.rs

Lines changed: 6 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -495,18 +495,12 @@ impl MetadataProvider for PostgresMetadataProvider {
495495
) -> Result<Vec<DataFileChange>> {
496496
block_on(async {
497497
let rows = sqlx::query(
498-
"SELECT
499-
data.data_file_id,
500-
data.path,
501-
data.path_is_relative,
502-
data.file_size_bytes,
503-
data.footer_size,
504-
data.begin_snapshot
498+
"SELECT data.begin_snapshot
505499
FROM ducklake_data_file AS data
506500
WHERE data.table_id = $1
507501
AND data.begin_snapshot > $2
508502
AND data.begin_snapshot <= $3
509-
ORDER BY data.begin_snapshot, data.data_file_id",
503+
ORDER BY data.begin_snapshot",
510504
)
511505
.bind(table_id)
512506
.bind(start_snapshot)
@@ -516,20 +510,8 @@ impl MetadataProvider for PostgresMetadataProvider {
516510

517511
rows.into_iter()
518512
.map(|row| {
519-
let data_file_id: i64 = row.try_get(0)?;
520-
let file = DuckLakeFileData {
521-
path: row.try_get(1)?,
522-
path_is_relative: row.try_get(2)?,
523-
file_size_bytes: row.try_get(3)?,
524-
footer_size: row.try_get(4)?,
525-
encryption_key: String::new(),
526-
};
527-
let begin_snapshot: i64 = row.try_get(5)?;
528-
529513
Ok(DataFileChange {
530-
data_file_id,
531-
file,
532-
begin_snapshot,
514+
begin_snapshot: row.try_get(0)?,
533515
})
534516
})
535517
.collect()
@@ -544,25 +526,12 @@ impl MetadataProvider for PostgresMetadataProvider {
544526
) -> Result<Vec<DeleteFileChange>> {
545527
block_on(async {
546528
let rows = sqlx::query(
547-
"SELECT
548-
del.delete_file_id,
549-
del.data_file_id,
550-
del.path,
551-
del.path_is_relative,
552-
del.file_size_bytes,
553-
del.footer_size,
554-
del.delete_count,
555-
del.begin_snapshot,
556-
data.path AS data_file_path,
557-
data.path_is_relative AS data_path_is_relative,
558-
data.file_size_bytes AS data_file_size,
559-
data.footer_size AS data_footer_size
529+
"SELECT del.begin_snapshot
560530
FROM ducklake_delete_file AS del
561-
JOIN ducklake_data_file AS data ON del.data_file_id = data.data_file_id
562531
WHERE del.table_id = $1
563532
AND del.begin_snapshot > $2
564533
AND del.begin_snapshot <= $3
565-
ORDER BY del.begin_snapshot, del.delete_file_id",
534+
ORDER BY del.begin_snapshot",
566535
)
567536
.bind(table_id)
568537
.bind(start_snapshot)
@@ -572,32 +541,8 @@ impl MetadataProvider for PostgresMetadataProvider {
572541

573542
rows.into_iter()
574543
.map(|row| {
575-
let delete_file_id: i64 = row.try_get(0)?;
576-
let data_file_id: i64 = row.try_get(1)?;
577-
let delete_file = DuckLakeFileData {
578-
path: row.try_get(2)?,
579-
path_is_relative: row.try_get(3)?,
580-
file_size_bytes: row.try_get(4)?,
581-
footer_size: row.try_get(5)?,
582-
encryption_key: String::new(),
583-
};
584-
let delete_count: Option<i64> = row.try_get(6)?;
585-
let begin_snapshot: i64 = row.try_get(7)?;
586-
let data_file = DuckLakeFileData {
587-
path: row.try_get(8)?,
588-
path_is_relative: row.try_get(9)?,
589-
file_size_bytes: row.try_get(10)?,
590-
footer_size: row.try_get(11)?,
591-
encryption_key: String::new(),
592-
};
593-
594544
Ok(DeleteFileChange {
595-
delete_file_id,
596-
data_file_id,
597-
delete_file,
598-
delete_count,
599-
begin_snapshot,
600-
data_file,
545+
begin_snapshot: row.try_get(0)?,
601546
})
602547
})
603548
.collect()

0 commit comments

Comments
 (0)