Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ffi/src/transaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ mod tests {
// writer must be closed to write footer
let res = writer.close().unwrap();

create_file_metadata(file_path, res.num_rows, metadata_schema)
create_file_metadata(file_path, res.file_metadata().num_rows(), metadata_schema)
}

#[tokio::test]
Expand Down
15 changes: 14 additions & 1 deletion kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,18 @@ version = "56"
features = ["async", "object_store"]
optional = true

# arrow 57
[dependencies.arrow_57]
package = "arrow"
version = "57"
features = ["chrono-tz", "ffi", "json", "prettyprint"]
optional = true
[dependencies.parquet_57]
package = "parquet"
version = "57"
features = ["async", "object_store"]
optional = true

[features]
# no default features
default = []
Expand All @@ -99,11 +111,12 @@ internal-api = []
integration-test = ["hdfs-native-object-store/integration-test"]

# The default versions for arrow/parquet/object_store
arrow = ["arrow-56"] # latest arrow version
arrow = ["arrow-57"] # latest arrow version
need-arrow = [] # need-arrow is a marker that the feature needs arrow dep

arrow-55 = ["dep:arrow_55", "dep:parquet_55", "object_store", "comfy-table"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we're only committing to latest two versions for support, so let's remove arrow-55

arrow-56 = ["dep:arrow_56", "dep:parquet_56", "object_store", "comfy-table"]
arrow-57 = ["dep:arrow_57", "dep:parquet_57", "object_store", "comfy-table"]
arrow-conversion = ["need-arrow"]
arrow-expression = ["need-arrow"]

Expand Down
5 changes: 3 additions & 2 deletions kernel/examples/read-table-multi-threaded/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ edition = "2021"
publish = false

[dependencies]
arrow = { version = "56", features = ["prettyprint", "chrono-tz"] }
arrow = { version = "57", features = ["prettyprint", "chrono-tz"] }
clap = { version = "4.5", features = ["derive"] }
# common pulls in arrow latest so we have to keep all these in sync here
common = { path = "../common" }
delta_kernel = { path = "../../../kernel", features = [
"arrow-56",
"arrow",
"default-engine-rustls",
"internal-api",
] }
Expand Down
4 changes: 2 additions & 2 deletions kernel/examples/read-table-single-threaded/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ edition = "2021"
publish = false

[dependencies]
arrow = { version = "56", features = ["prettyprint", "chrono-tz"] }
arrow = { version = "57", features = ["prettyprint", "chrono-tz"] }
clap = { version = "4.5", features = ["derive"] }
common = { path = "../common" }
delta_kernel = { path = "../../../kernel", features = [
"arrow-56",
"arrow",
"default-engine-rustls",
"internal-api",
] }
Expand Down
5 changes: 3 additions & 2 deletions kernel/examples/write-table/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ edition = "2021"
publish = false

[dependencies]
arrow = { version = "56", features = ["prettyprint", "chrono-tz"] }
arrow = { version = "57", features = ["prettyprint", "chrono-tz"] }
clap = { version = "4.5", features = ["derive"] }
# NB: common depends on 'arrow' (latest) so have to match here
common = { path = "../common" }
delta_kernel = { path = "../../../kernel", features = [
"arrow-56",
"arrow",
"default-engine-rustls",
"internal-api",
] }
Expand Down
25 changes: 20 additions & 5 deletions kernel/src/arrow_compat.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
//! This module re-exports the different versions of arrow, parquet, and object_store we support.
#[cfg(feature = "arrow-56")]
#[cfg(feature = "arrow-57")]
mod arrow_compat_shims {
pub use arrow_57 as arrow;

Check warning on line 5 in kernel/src/arrow_compat.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest)

unused import: `arrow_57 as arrow`

Check warning on line 5 in kernel/src/arrow_compat.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `arrow_57 as arrow`
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably turn off these warnings if they are spurious

pub use parquet_57 as parquet;

Check warning on line 6 in kernel/src/arrow_compat.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest)

unused import: `parquet_57 as parquet`

Check warning on line 6 in kernel/src/arrow_compat.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `parquet_57 as parquet`
}

#[cfg(all(
not(feature = "arrow-55"),
feature = "arrow-56",
not(feature = "arrow-57")
))]
mod arrow_compat_shims {
pub use arrow_56 as arrow;
pub use parquet_56 as parquet;
}

#[cfg(all(feature = "arrow-55", not(feature = "arrow-56")))]
#[cfg(all(
feature = "arrow-55",
not(feature = "arrow-56"),
not(feature = "arrow-57")
))]
mod arrow_compat_shims {
pub use arrow_55 as arrow;
pub use parquet_55 as parquet;
Expand All @@ -17,9 +31,10 @@
#[cfg(all(
feature = "need-arrow",
not(feature = "arrow-55"),
not(feature = "arrow-56")
not(feature = "arrow-56"),
not(feature = "arrow-57")
))]
compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow-55` or `arrow-56` feature");
compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow-55`, `arrow-56`, or `arrow-57` feature");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit for this message if/when you remove arrow-55

Suggested change
compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow-55`, `arrow-56`, or `arrow-57` feature");
compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow-56`, or `arrow-57` feature");


#[cfg(any(feature = "arrow-55", feature = "arrow-56"))]
#[cfg(any(feature = "arrow-55", feature = "arrow-56", feature = "arrow-57"))]
pub use arrow_compat_shims::*;

Check warning on line 40 in kernel/src/arrow_compat.rs

View workflow job for this annotation

GitHub Actions / test (ubuntu-latest)

unused import: `arrow_compat_shims::*`

Check warning on line 40 in kernel/src/arrow_compat.rs

View workflow job for this annotation

GitHub Actions / coverage

unused import: `arrow_compat_shims::*`
9 changes: 4 additions & 5 deletions kernel/src/checkpoint/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ use crate::action_reconciliation::{
use crate::actions::{Add, Metadata, Protocol, Remove};
use crate::arrow::array::{ArrayRef, StructArray};
use crate::arrow::datatypes::{DataType, Schema};
use crate::arrow::{
array::{create_array, RecordBatch},
datatypes::Field,
};
use crate::checkpoint::create_last_checkpoint_data;
use crate::engine::arrow_data::ArrowEngineData;
use crate::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine};
Expand All @@ -14,11 +18,6 @@ use crate::schema::{DataType as KernelDataType, StructField, StructType};
use crate::utils::test_utils::Action;
use crate::{DeltaResult, FileMeta, LogPath, Snapshot};

use arrow_56::{
array::{create_array, RecordBatch},
datatypes::Field,
};

use object_store::{memory::InMemory, path::Path, ObjectStore};
use serde_json::{from_slice, json, Value};
use test_utils::delta_path_for_version;
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/engine/ensure_data_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ mod tests {
&incorrect_variant_arrow_type(),
true,
),
"Invalid argument error: Incorrect datatype. Expected Struct(metadata Binary, value Binary), got Struct(field_1 Binary, field_2 Binary)",
"Invalid argument error: Incorrect datatype. Expected Struct(\"metadata\": Binary, \"value\": Binary), got Struct(\"field_1\": nullable Binary, \"field_2\": nullable Binary)",
)
}

Expand Down
1 change: 0 additions & 1 deletion mem-test/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ version.workspace = true
release = false

[dependencies]
arrow = "56"
delta_kernel = { path = "../kernel", features = ["arrow", "default-engine-rustls"] }
dhat = "0.3"
object_store = "0.12.3"
Expand Down
6 changes: 3 additions & 3 deletions mem-test/tests/dhat_large_table_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::path::Path;
use std::sync::Arc;

use delta_kernel::arrow::array::{ArrayRef, Int64Array, StringArray};
use delta_kernel::arrow::compute::filter_record_batch;
use delta_kernel::arrow::record_batch::RecordBatch;
use delta_kernel::engine::arrow_data::ArrowEngineData;
use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor;
Expand All @@ -16,7 +17,6 @@ use delta_kernel::parquet::arrow::ArrowWriter;
use delta_kernel::parquet::file::properties::WriterProperties;
use delta_kernel::Snapshot;

use arrow::compute::filter_record_batch;
use object_store::local::LocalFileSystem;
use serde_json::json;
use tempfile::tempdir;
Expand Down Expand Up @@ -46,9 +46,9 @@ fn write_large_parquet_to(path: &Path) -> Result<(), Box<dyn std::error::Error>>
let metadata = std::fs::metadata(&path)?;
let file_size = metadata.len();
let total_row_group_size: i64 = parquet_metadata
.row_groups
.row_groups()
.iter()
.map(|rg| rg.total_byte_size)
.map(|rg| rg.total_byte_size())
.sum();
println!("File size (compressed file size): {} bytes", file_size);
println!(
Expand Down
Loading