From 50c1a5bf6205c321e9ef49c730aeb25a772e8e38 Mon Sep 17 00:00:00 2001 From: Seoyoung Lee Date: Wed, 24 Sep 2025 18:09:24 +0900 Subject: [PATCH 1/4] refactor: Unify Reader/WriterFeature into a single TableFeature --- kernel/src/actions/crc.rs | 6 +- kernel/src/actions/mod.rs | 97 ++++--- kernel/src/actions/visitors.rs | 6 +- kernel/src/engine/arrow_data.rs | 6 +- kernel/src/schema/variant_utils.rs | 16 +- kernel/src/table_changes/log_replay/tests.rs | 8 +- kernel/src/table_changes/mod.rs | 6 +- kernel/src/table_configuration.rs | 86 +++--- kernel/src/table_features/column_mapping.rs | 11 +- kernel/src/table_features/mod.rs | 273 ++++++++----------- kernel/src/table_features/timestamp_ntz.rs | 12 +- 11 files changed, 239 insertions(+), 288 deletions(-) diff --git a/kernel/src/actions/crc.rs b/kernel/src/actions/crc.rs index e9265b180..655340e8c 100644 --- a/kernel/src/actions/crc.rs +++ b/kernel/src/actions/crc.rs @@ -150,7 +150,7 @@ mod tests { use crate::engine::sync::SyncEngine; use crate::schema::derive_macro_utils::ToDataType as _; use crate::schema::{ArrayType, DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::string_array_to_engine_data; use crate::Engine; @@ -252,8 +252,8 @@ mod tests { let expected_protocol = Protocol { min_reader_version: 3, min_writer_version: 7, - reader_features: Some(vec![ReaderFeature::ColumnMapping]), - writer_features: Some(vec![WriterFeature::ColumnMapping]), + reader_features: Some(vec![TableFeature::ColumnMapping]), + writer_features: Some(vec![TableFeature::ColumnMapping]), }; let expected_metadata = Metadata { id: "testId".to_string(), diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 0625fbd04..1864d7fe8 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -13,7 +13,7 @@ use crate::schema::{ ArrayType, DataType, MapType, SchemaRef, StructField, StructType, ToSchema as _, }; use crate::table_features::{ - ReaderFeature, WriterFeature, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES, + FeatureType, TableFeature, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES, }; use crate::table_properties::TableProperties; use crate::utils::require; @@ -348,11 +348,11 @@ pub(crate) struct Protocol { /// A collection of features that a client must implement in order to correctly /// read this table (exist only when minReaderVersion is set to 3) #[serde(skip_serializing_if = "Option::is_none")] - reader_features: Option>, + reader_features: Option>, /// A collection of features that a client must implement in order to correctly /// write this table (exist only when minWriterVersion is set to 7) #[serde(skip_serializing_if = "Option::is_none")] - writer_features: Option>, + writer_features: Option>, } fn parse_features(features: Option>) -> Option> @@ -429,26 +429,39 @@ impl Protocol { /// Get the reader features for the protocol #[internal_api] - pub(crate) fn reader_features(&self) -> Option<&[ReaderFeature]> { + pub(crate) fn reader_features(&self) -> Option<&[TableFeature]> { self.reader_features.as_deref() } /// Get the writer features for the protocol #[internal_api] - pub(crate) fn writer_features(&self) -> Option<&[WriterFeature]> { + pub(crate) fn writer_features(&self) -> Option<&[TableFeature]> { self.writer_features.as_deref() } /// True if this protocol has the requested reader feature - pub(crate) fn has_reader_feature(&self, feature: &ReaderFeature) -> bool { + /// Note: Use `has_table_feature` instead if given feature is supported for both read/write. + pub(crate) fn has_reader_feature(&self, feature: &TableFeature) -> bool { self.reader_features() .is_some_and(|features| features.contains(feature)) } - /// True if this protocol has the requested writer feature - pub(crate) fn has_writer_feature(&self, feature: &WriterFeature) -> bool { - self.writer_features() - .is_some_and(|features| features.contains(feature)) + /// True if this protocol has the requested table feature + /// - ReaderWriter features must be in both reader features and writer features. + /// - Writer features only need to be in writer features. + pub(crate) fn has_table_feature(&self, feature: &TableFeature) -> bool { + match feature.feature_type() { + FeatureType::ReaderWriter => { + self.reader_features() + .is_some_and(|features| features.contains(feature)) + && self + .writer_features() + .is_some_and(|features| features.contains(feature)) + } + FeatureType::Writer => self + .writer_features() + .is_some_and(|features| features.contains(feature)), + } } /// Check if reading a table with this protocol is supported. That is: does the kernel support @@ -496,8 +509,8 @@ impl Protocol { ensure_supported_features(writer_features, &SUPPORTED_WRITER_FEATURES)?; // ensure that there is no illegal combination of features - if writer_features.contains(&WriterFeature::RowTracking) - && !writer_features.contains(&WriterFeature::DomainMetadata) + if writer_features.contains(&TableFeature::RowTracking) + && !writer_features.contains(&TableFeature::DomainMetadata) { Err(Error::invalid_protocol( "rowTracking feature requires domainMetadata to also be enabled", @@ -593,7 +606,7 @@ where return Ok(()); } - // we get the type name (ReaderFeature/WriterFeature) for better error messages + // we get the type name (TableFeature) for better error messages let features_type = std::any::type_name::() .rsplit("::") .next() @@ -1291,8 +1304,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::V2Checkpoint]), - Some([ReaderFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -1312,7 +1325,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), Some(&empty_features), ) .unwrap(); @@ -1322,7 +1335,7 @@ mod tests { 3, 7, Some(&empty_features), - Some([WriterFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -1330,8 +1343,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::V2Checkpoint]), - Some([WriterFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -1360,11 +1373,11 @@ mod tests { 7, Some::>(vec![]), Some(vec![ - WriterFeature::AppendOnly, - WriterFeature::DeletionVectors, - WriterFeature::DomainMetadata, - WriterFeature::Invariants, - WriterFeature::RowTracking, + TableFeature::AppendOnly, + TableFeature::DeletionVectors, + TableFeature::DomainMetadata, + TableFeature::Invariants, + TableFeature::RowTracking, ]), ) .unwrap(); @@ -1375,26 +1388,26 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::Unknown("unsupported reader".to_string())]), - Some([WriterFeature::IdentityColumns]), + Some([TableFeature::Unknown("unsupported reader".to_string())]), + Some([TableFeature::IdentityColumns]), ) .unwrap(); assert_result_error_with_message( protocol.ensure_write_supported(), - r#"Unsupported: Unknown WriterFeatures: "identityColumns". Supported WriterFeatures: "appendOnly", "deletionVectors", "domainMetadata", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, + r#"Unsupported: Unknown TableFeatures: "identityColumns". Supported TableFeatures: "appendOnly", "deletionVectors", "domainMetadata", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, ); // Unknown writer features should cause an error let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::Unknown("unsupported reader".to_string())]), - Some([WriterFeature::Unknown("unsupported writer".to_string())]), + Some([TableFeature::Unknown("unsupported reader".to_string())]), + Some([TableFeature::Unknown("unsupported writer".to_string())]), ) .unwrap(); assert_result_error_with_message( protocol.ensure_write_supported(), - r#"Unsupported: Unknown WriterFeatures: "unsupported writer". Supported WriterFeatures: "appendOnly", "deletionVectors", "domainMetadata", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, + r#"Unsupported: Unknown TableFeatures: "unsupported writer". Supported TableFeatures: "appendOnly", "deletionVectors", "domainMetadata", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, ); } @@ -1406,7 +1419,7 @@ mod tests { Some::>(vec![]), Some(vec![ // No domain metadata even though that is required - WriterFeature::RowTracking, + TableFeature::RowTracking, ]), ) .unwrap(); @@ -1419,16 +1432,16 @@ mod tests { #[test] fn test_ensure_supported_features() { - let supported_features = [ReaderFeature::ColumnMapping, ReaderFeature::DeletionVectors]; - let table_features = vec![ReaderFeature::ColumnMapping]; + let supported_features = [TableFeature::ColumnMapping, TableFeature::DeletionVectors]; + let table_features = vec![TableFeature::ColumnMapping]; ensure_supported_features(&table_features, &supported_features).unwrap(); // test unknown features - let table_features = vec![ReaderFeature::ColumnMapping, ReaderFeature::unknown("idk")]; + let table_features = vec![TableFeature::ColumnMapping, TableFeature::unknown("idk")]; let error = ensure_supported_features(&table_features, &supported_features).unwrap_err(); match error { Error::Unsupported(e) if e == - "Unknown ReaderFeatures: \"idk\". Supported ReaderFeatures: \"columnMapping\", \"deletionVectors\"" + "Unknown TableFeatures: \"idk\". Supported TableFeatures: \"columnMapping\", \"deletionVectors\"" => {}, _ => panic!("Expected unsupported error, got: {error}"), } @@ -1438,16 +1451,16 @@ mod tests { fn test_parse_table_feature_never_fails() { // parse a non-str let features = Some([5]); - let expected = Some(vec![ReaderFeature::unknown("5")]); - assert_eq!(parse_features::(features), expected); + let expected = Some(vec![TableFeature::unknown("5")]); + assert_eq!(parse_features::(features), expected); // weird strs let features = Some(["", "absurD_)(+13%^⚙️"]); let expected = Some(vec![ - ReaderFeature::unknown(""), - ReaderFeature::unknown("absurD_)(+13%^⚙️"), + TableFeature::unknown(""), + TableFeature::unknown("absurD_)(+13%^⚙️"), ]); - assert_eq!(parse_features::(features), expected); + assert_eq!(parse_features::(features), expected); } #[test] @@ -1808,8 +1821,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::ColumnMapping]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index f10ecf1de..f676190eb 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -685,7 +685,7 @@ mod tests { use crate::engine::sync::SyncEngine; use crate::expressions::{column_expr_ref, Expression}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::{action_batch, parse_json_batch}; use crate::Engine; @@ -696,8 +696,8 @@ mod tests { let expected = Protocol { min_reader_version: 3, min_writer_version: 7, - reader_features: Some(vec![ReaderFeature::DeletionVectors]), - writer_features: Some(vec![WriterFeature::DeletionVectors]), + reader_features: Some(vec![TableFeature::DeletionVectors]), + writer_features: Some(vec![TableFeature::DeletionVectors]), }; assert_eq!(parsed, expected); Ok(()) diff --git a/kernel/src/engine/arrow_data.rs b/kernel/src/engine/arrow_data.rs index b6a48e581..a45b6109d 100644 --- a/kernel/src/engine/arrow_data.rs +++ b/kernel/src/engine/arrow_data.rs @@ -352,7 +352,7 @@ mod tests { use crate::engine::sync::SyncEngine; use crate::expressions::ArrayData; use crate::schema::{ArrayType, DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::{assert_result_error_with_message, string_array_to_engine_data}; use crate::{DeltaResult, Engine as _, EngineData as _}; @@ -394,11 +394,11 @@ mod tests { assert_eq!(protocol.min_writer_version(), 7); assert_eq!( protocol.reader_features(), - Some([ReaderFeature::unknown("rw1")].as_slice()) + Some([TableFeature::unknown("rw1")].as_slice()) ); assert_eq!( protocol.writer_features(), - Some([WriterFeature::unknown("rw1"), WriterFeature::unknown("w2")].as_slice()) + Some([TableFeature::unknown("rw1"), TableFeature::unknown("w2")].as_slice()) ); Ok(()) } diff --git a/kernel/src/schema/variant_utils.rs b/kernel/src/schema/variant_utils.rs index c9bb88bd8..454a32f64 100644 --- a/kernel/src/schema/variant_utils.rs +++ b/kernel/src/schema/variant_utils.rs @@ -2,7 +2,7 @@ use crate::actions::Protocol; use crate::schema::{Schema, SchemaTransform, StructType}; -use crate::table_features::{ReaderFeature, WriterFeature}; +use crate::table_features::TableFeature; use crate::utils::require; use crate::{DeltaResult, Error}; use std::borrow::Cow; @@ -24,10 +24,8 @@ pub(crate) fn validate_variant_type_feature_support( ) -> DeltaResult<()> { // Both the reader and writer need to have either the VariantType or the VariantTypePreview // features. - if (!protocol.has_reader_feature(&ReaderFeature::VariantType) - && !protocol.has_reader_feature(&ReaderFeature::VariantTypePreview)) - || (!protocol.has_writer_feature(&WriterFeature::VariantType) - && !protocol.has_writer_feature(&WriterFeature::VariantTypePreview)) + if !protocol.has_table_feature(&TableFeature::VariantType) + && !protocol.has_table_feature(&TableFeature::VariantTypePreview) { let mut uses_variant = UsesVariant::default(); let _ = uses_variant.transform_struct(schema); @@ -46,7 +44,7 @@ mod tests { use super::*; use crate::actions::Protocol; use crate::schema::{DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::assert_result_error_with_message; #[test] @@ -74,10 +72,10 @@ mod tests { #[test] fn test_variant_feature_validation() { let features = [ - (ReaderFeature::VariantType, WriterFeature::VariantType), + (TableFeature::VariantType, TableFeature::VariantType), ( - ReaderFeature::VariantTypePreview, - WriterFeature::VariantTypePreview, + TableFeature::VariantTypePreview, + TableFeature::VariantTypePreview, ), ]; let schema_with_variant = StructType::new_unchecked([ diff --git a/kernel/src/table_changes/log_replay/tests.rs b/kernel/src/table_changes/log_replay/tests.rs index 0b8aef0ff..d87f7aaf7 100644 --- a/kernel/src/table_changes/log_replay/tests.rs +++ b/kernel/src/table_changes/log_replay/tests.rs @@ -10,7 +10,7 @@ use crate::scan::state::DvInfo; use crate::scan::PhysicalPredicate; use crate::schema::{DataType, StructField, StructType}; use crate::table_changes::log_replay::LogReplayScanner; -use crate::table_features::ReaderFeature; +use crate::table_features::TableFeature; use crate::utils::test_utils::{assert_result_error_with_message, Action, LocalMockTable}; use crate::Predicate; use crate::{DeltaResult, Engine, Error, Version}; @@ -79,8 +79,8 @@ async fn metadata_protocol() { Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([ReaderFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::ColumnMapping]), ) .unwrap(), ), @@ -138,7 +138,7 @@ async fn unsupported_reader_feature() { Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), Some([""; 0]), ) .unwrap(), diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index 73b82b66b..624c1115c 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -41,7 +41,7 @@ use crate::log_segment::LogSegment; use crate::path::AsUrl; use crate::schema::{DataType, Schema, StructField, StructType}; use crate::snapshot::{Snapshot, SnapshotRef}; -use crate::table_features::{ColumnMappingMode, ReaderFeature}; +use crate::table_features::{ColumnMappingMode, TableFeature}; use crate::table_properties::TableProperties; use crate::utils::require; use crate::{DeltaResult, Engine, Error, Version}; @@ -270,8 +270,8 @@ fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult /// Ensures that Change Data Feed is supported for a table with this [`Protocol`] . /// See the documentation of [`TableChanges`] for more details. fn ensure_cdf_read_supported(protocol: &Protocol) -> DeltaResult<()> { - static CDF_SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| vec![ReaderFeature::DeletionVectors]); + static CDF_SUPPORTED_READER_FEATURES: LazyLock> = + LazyLock::new(|| vec![TableFeature::DeletionVectors]); match &protocol.reader_features() { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if protocol.min_reader_version() == 3 => { diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index b5586d0e4..149582d1e 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -17,7 +17,7 @@ use crate::schema::variant_utils::validate_variant_type_feature_support; use crate::schema::{InvariantChecker, SchemaRef}; use crate::table_features::{ column_mapping_mode, validate_schema_column_mapping, validate_timestamp_ntz_feature_support, - ColumnMappingMode, ReaderFeature, WriterFeature, + ColumnMappingMode, TableFeature, }; use crate::table_properties::TableProperties; use crate::{DeltaResult, Error, Version}; @@ -208,8 +208,8 @@ impl TableConfiguration { /// [`TableChanges`]: crate::table_changes::TableChanges #[internal_api] pub(crate) fn is_cdf_read_supported(&self) -> bool { - static CDF_SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| vec![ReaderFeature::DeletionVectors]); + static CDF_SUPPORTED_READER_FEATURES: LazyLock> = + LazyLock::new(|| vec![TableFeature::DeletionVectors]); let protocol_supported = match self.protocol.reader_features() { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if self.protocol.min_reader_version() == 3 => { @@ -239,15 +239,10 @@ impl TableConfiguration { #[internal_api] #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_supported(&self) -> bool { - let read_supported = self - .protocol() - .has_reader_feature(&ReaderFeature::DeletionVectors) - && self.protocol.min_reader_version() == 3; - let write_supported = self - .protocol() - .has_writer_feature(&WriterFeature::DeletionVectors) - && self.protocol.min_writer_version() == 7; - read_supported && write_supported + self.protocol() + .has_table_feature(&TableFeature::DeletionVectors) + && self.protocol.min_reader_version() == 3 + && self.protocol.min_writer_version() == 7 } /// Returns `true` if writing deletion vectors is enabled for this table. This is the case @@ -267,12 +262,12 @@ impl TableConfiguration { /// Returns `true` if the table supports the appendOnly table feature. To support this feature: /// - The table must have a writer version between 2 and 7 (inclusive) - /// - If the table is on writer version 7, it must have the [`WriterFeature::AppendOnly`] + /// - If the table is on writer version 7, it must have the [`TableFeature::AppendOnly`] /// writer feature. pub(crate) fn is_append_only_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_writer_feature(&WriterFeature::AppendOnly) => true, + 7 if protocol.has_table_feature(&TableFeature::AppendOnly) => true, version => (2..=6).contains(&version), } } @@ -286,7 +281,7 @@ impl TableConfiguration { pub(crate) fn is_invariants_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_writer_feature(&WriterFeature::Invariants) => true, + 7 if protocol.has_table_feature(&TableFeature::Invariants) => true, version => (2..=6).contains(&version), } } @@ -297,26 +292,21 @@ impl TableConfiguration { /// /// See: pub(crate) fn is_v2_checkpoint_write_supported(&self) -> bool { - let read_supported = self - .protocol() - .has_reader_feature(&ReaderFeature::V2Checkpoint); - let write_supported = self - .protocol() - .has_writer_feature(&WriterFeature::V2Checkpoint); - read_supported && write_supported + self.protocol() + .has_table_feature(&TableFeature::V2Checkpoint) } /// Returns `true` if the table supports writing in-commit timestamps. /// /// To support this feature the table must: /// - Have a min_writer_version of 7 - /// - Have the [`WriterFeature::InCommitTimestamp`] writer feature. + /// - Have the [`TableFeature::InCommitTimestamp`] writer feature. #[allow(unused)] pub(crate) fn is_in_commit_timestamps_supported(&self) -> bool { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&WriterFeature::InCommitTimestamp) + .has_table_feature(&TableFeature::InCommitTimestamp) } /// Returns `true` if in-commit timestamps is supported and it is enabled. In-commit timestamps @@ -369,25 +359,25 @@ impl TableConfiguration { /// /// To support this feature the table must: /// - Have a min_writer_version of 7. - /// - Have the [`WriterFeature::DomainMetadata`] writer feature. + /// - Have the [`TableFeature::DomainMetadata`] writer feature. #[allow(unused)] pub(crate) fn is_domain_metadata_supported(&self) -> bool { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&WriterFeature::DomainMetadata) + .has_table_feature(&TableFeature::DomainMetadata) } /// Returns `true` if the table supports writing row tracking metadata. /// /// To support this feature the table must: /// - Have a min_writer_version of 7. - /// - Have the [`WriterFeature::RowTracking`] writer feature. + /// - Have the [`TableFeature::RowTracking`] writer feature. pub(crate) fn is_row_tracking_supported(&self) -> bool { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&WriterFeature::RowTracking) + .has_table_feature(&TableFeature::RowTracking) } /// Returns `true` if row tracking is enabled for this table. @@ -433,7 +423,7 @@ mod test { use crate::actions::{Metadata, Protocol}; use crate::schema::{DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::table_properties::TableProperties; use crate::utils::test_utils::assert_result_error_with_message; use crate::Error; @@ -455,8 +445,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -485,8 +475,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -513,7 +503,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -598,7 +588,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -618,7 +608,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -652,8 +642,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::TimestampWithoutTimezone]), - Some([WriterFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -677,8 +667,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -707,11 +697,11 @@ mod test { let new_protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors, ReaderFeature::V2Checkpoint]), + Some([TableFeature::DeletionVectors, TableFeature::V2Checkpoint]), Some([ - WriterFeature::DeletionVectors, - WriterFeature::V2Checkpoint, - WriterFeature::AppendOnly, + TableFeature::DeletionVectors, + TableFeature::V2Checkpoint, + TableFeature::AppendOnly, ]), ) .unwrap(); @@ -761,8 +751,8 @@ mod test { let protocol_with_timestamp_ntz_features = Protocol::try_new( 3, 7, - Some([ReaderFeature::TimestampWithoutTimezone]), - Some([WriterFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), ) .unwrap(); @@ -806,8 +796,8 @@ mod test { let protocol_with_variant_features = Protocol::try_new( 3, 7, - Some([ReaderFeature::VariantType]), - Some([WriterFeature::VariantType]), + Some([TableFeature::VariantType]), + Some([TableFeature::VariantType]), ) .unwrap(); diff --git a/kernel/src/table_features/column_mapping.rs b/kernel/src/table_features/column_mapping.rs index cf92d6952..3d11c0520 100644 --- a/kernel/src/table_features/column_mapping.rs +++ b/kernel/src/table_features/column_mapping.rs @@ -1,5 +1,5 @@ //! Code to handle column mapping, including modes and schema transforms -use super::ReaderFeature; +use super::TableFeature; use crate::actions::Protocol; use crate::schema::{ ColumnName, DataType, MetadataValue, Schema, SchemaTransform, StructField, StructType, @@ -38,7 +38,8 @@ pub(crate) fn column_mapping_mode( // (but should be ignored) even when the feature is not supported. For details see // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#column-mapping (Some(mode), 2) => mode, - (Some(mode), 3) if protocol.has_reader_feature(&ReaderFeature::ColumnMapping) => mode, + // TODO: replace with `has_table_feature` whenever column mapping for write is supported + (Some(mode), 3) if protocol.has_reader_feature(&TableFeature::ColumnMapping) => mode, _ => ColumnMappingMode::None, } } @@ -204,7 +205,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::ColumnMapping]), + Some([TableFeature::ColumnMapping]), empty_features.clone(), ) .unwrap(); @@ -222,7 +223,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), empty_features.clone(), ) .unwrap(); @@ -240,7 +241,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), empty_features, ) .unwrap(); diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index 10ebed0d9..eb566202a 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -14,71 +14,16 @@ pub(crate) use timestamp_ntz::validate_timestamp_ntz_feature_support; mod column_mapping; mod timestamp_ntz; -/// Reader features communicate capabilities that must be implemented in order to correctly read a -/// given table. That is, readers must implement and respect all features listed in a table's -/// `ReaderFeatures`. Note that any feature listed as a `ReaderFeature` must also have a -/// corresponding `WriterFeature`. +/// Table features represent protocol capabilities required to correctly read or write a given table. +/// - Readers must implement all features required for correct table reads. +/// - Writers must implement all features required for correct table writes. /// -/// The kernel currently supports all reader features except for V2Checkpoints. -#[derive( - Serialize, - Deserialize, - Debug, - Clone, - Eq, - PartialEq, - EnumString, - StrumDisplay, - AsRefStr, - EnumCount, - Hash, -)] -#[strum(serialize_all = "camelCase")] -#[serde(rename_all = "camelCase")] -#[internal_api] -pub(crate) enum ReaderFeature { - /// CatalogManaged tables: - /// - CatalogManaged, - #[strum(serialize = "catalogOwned-preview")] - #[serde(rename = "catalogOwned-preview")] - CatalogOwnedPreview, - /// Mapping of one column to another - ColumnMapping, - /// Deletion vectors for merge, update, delete - DeletionVectors, - /// timestamps without timezone support - #[strum(serialize = "timestampNtz")] - #[serde(rename = "timestampNtz")] - TimestampWithoutTimezone, - // Allow columns to change type - TypeWidening, - #[strum(serialize = "typeWidening-preview")] - #[serde(rename = "typeWidening-preview")] - TypeWideningPreview, - /// version 2 of checkpointing - V2Checkpoint, - /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer - /// protocol checks during VACUUM operations - VacuumProtocolCheck, - /// This feature enables support for the variant data type, which stores semi-structured data. - VariantType, - #[strum(serialize = "variantType-preview")] - #[serde(rename = "variantType-preview")] - VariantTypePreview, - #[strum(serialize = "variantShredding-preview")] - #[serde(rename = "variantShredding-preview")] - VariantShreddingPreview, - #[serde(untagged)] - #[strum(default)] - Unknown(String), -} - -/// Similar to reader features, writer features communicate capabilities that must be implemented -/// in order to correctly write to a given table. That is, writers must implement and respect all -/// features listed in a table's `WriterFeatures`. +/// Each variant corresponds to one such feature. A feature is either: +/// - **ReaderWriter** (must be supported by both readers and writers), or +/// - **Writer only** (applies only to writers). +/// There are no Reader only features. See [`TableFeature::feature_type`] for the category of each. /// -/// Kernel write support is currently in progress and as such these are not supported. +/// The kernel currently supports all reader features except `V2Checkpoints`. #[derive( Serialize, Deserialize, @@ -95,7 +40,7 @@ pub(crate) enum ReaderFeature { #[strum(serialize_all = "camelCase")] #[serde(rename_all = "camelCase")] #[internal_api] -pub(crate) enum WriterFeature { +pub(crate) enum TableFeature { /// CatalogManaged tables: /// CatalogManaged, @@ -160,65 +105,73 @@ pub(crate) enum WriterFeature { Unknown(String), } -impl ToDataType for ReaderFeature { - fn to_data_type() -> DataType { - DataType::STRING - } +#[derive(Eq, PartialEq)] +pub(crate) enum FeatureType { + Writer, + ReaderWriter, } -impl ToDataType for WriterFeature { - fn to_data_type() -> DataType { - DataType::STRING +impl TableFeature { + pub(crate) fn feature_type(&self) -> FeatureType { + match self { + TableFeature::CatalogManaged + | TableFeature::CatalogOwnedPreview + | TableFeature::ColumnMapping + | TableFeature::DeletionVectors + | TableFeature::TimestampWithoutTimezone + | TableFeature::TypeWidening + | TableFeature::TypeWideningPreview + | TableFeature::V2Checkpoint + | TableFeature::VacuumProtocolCheck + | TableFeature::VariantType + | TableFeature::VariantTypePreview + | TableFeature::VariantShreddingPreview + | TableFeature::Unknown(_) => FeatureType::ReaderWriter, + _ => FeatureType::Writer, + } } } -impl From for Scalar { - fn from(feature: ReaderFeature) -> Self { - Scalar::String(feature.to_string()) +impl ToDataType for TableFeature { + fn to_data_type() -> DataType { + DataType::STRING } } -impl From for Scalar { - fn from(feature: WriterFeature) -> Self { +impl From for Scalar { + fn from(feature: TableFeature) -> Self { Scalar::String(feature.to_string()) } } #[cfg(test)] // currently only used in tests -impl ReaderFeature { - pub(crate) fn unknown(s: impl ToString) -> Self { - ReaderFeature::Unknown(s.to_string()) - } -} - -#[cfg(test)] // currently only used in tests -impl WriterFeature { +impl TableFeature { pub(crate) fn unknown(s: impl ToString) -> Self { - WriterFeature::Unknown(s.to_string()) + TableFeature::Unknown(s.to_string()) } } -pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| { +pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| { vec![ #[cfg(feature = "catalog-managed")] - ReaderFeature::CatalogManaged, + TableFeature::CatalogManaged, #[cfg(feature = "catalog-managed")] - ReaderFeature::CatalogOwnedPreview, - ReaderFeature::ColumnMapping, - ReaderFeature::DeletionVectors, - ReaderFeature::TimestampWithoutTimezone, - ReaderFeature::TypeWidening, - ReaderFeature::TypeWideningPreview, - ReaderFeature::VacuumProtocolCheck, - ReaderFeature::V2Checkpoint, - ReaderFeature::VariantType, - ReaderFeature::VariantTypePreview, + TableFeature::CatalogOwnedPreview, + TableFeature::ColumnMapping, + TableFeature::DeletionVectors, + TableFeature::TimestampWithoutTimezone, + TableFeature::TypeWidening, + TableFeature::TypeWideningPreview, + TableFeature::VacuumProtocolCheck, + TableFeature::V2Checkpoint, + TableFeature::VariantType, + TableFeature::VariantTypePreview, // The default engine currently DOES NOT support shredded Variant reads and the parquet // reader will reject the read if it sees a shredded schema in the parquet file. That being // said, kernel does permit reconstructing shredded variants into the // `STRUCT` representation if parquet readers of // third-party engines support it. - ReaderFeature::VariantShreddingPreview, + TableFeature::VariantShreddingPreview, ] }); @@ -228,17 +181,17 @@ pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = Lazy /// - We only support DeletionVectors in that we never write them (no DML). /// - We support writing to existing tables with row tracking, but we don't support creating /// tables with row tracking yet. -pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = LazyLock::new(|| { +pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = LazyLock::new(|| { vec![ - WriterFeature::AppendOnly, - WriterFeature::DeletionVectors, - WriterFeature::DomainMetadata, - WriterFeature::Invariants, - WriterFeature::RowTracking, - WriterFeature::TimestampWithoutTimezone, - WriterFeature::VariantType, - WriterFeature::VariantTypePreview, - WriterFeature::VariantShreddingPreview, + TableFeature::AppendOnly, + TableFeature::DeletionVectors, + TableFeature::DomainMetadata, + TableFeature::Invariants, + TableFeature::RowTracking, + TableFeature::TimestampWithoutTimezone, + TableFeature::VariantType, + TableFeature::VariantTypePreview, + TableFeature::VariantShreddingPreview, ] }); @@ -249,14 +202,14 @@ mod tests { #[test] fn test_unknown_features() { let mixed_reader = &[ - ReaderFeature::DeletionVectors, - ReaderFeature::unknown("cool_feature"), - ReaderFeature::ColumnMapping, + TableFeature::DeletionVectors, + TableFeature::unknown("cool_feature"), + TableFeature::ColumnMapping, ]; let mixed_writer = &[ - WriterFeature::DeletionVectors, - WriterFeature::unknown("cool_feature"), - WriterFeature::AppendOnly, + TableFeature::DeletionVectors, + TableFeature::unknown("cool_feature"), + TableFeature::AppendOnly, ]; let reader_string = serde_json::to_string(mixed_reader).unwrap(); @@ -271,8 +224,8 @@ mod tests { "[\"deletionVectors\",\"cool_feature\",\"appendOnly\"]" ); - let typed_reader: Vec = serde_json::from_str(&reader_string).unwrap(); - let typed_writer: Vec = serde_json::from_str(&writer_string).unwrap(); + let typed_reader: Vec = serde_json::from_str(&reader_string).unwrap(); + let typed_writer: Vec = serde_json::from_str(&writer_string).unwrap(); assert_eq!(typed_reader.len(), 3); assert_eq!(&typed_reader, mixed_reader); @@ -283,35 +236,33 @@ mod tests { #[test] fn test_roundtrip_reader_features() { let cases = [ - (ReaderFeature::CatalogManaged, "catalogManaged"), - (ReaderFeature::CatalogOwnedPreview, "catalogOwned-preview"), - (ReaderFeature::ColumnMapping, "columnMapping"), - (ReaderFeature::DeletionVectors, "deletionVectors"), - (ReaderFeature::TimestampWithoutTimezone, "timestampNtz"), - (ReaderFeature::TypeWidening, "typeWidening"), - (ReaderFeature::TypeWideningPreview, "typeWidening-preview"), - (ReaderFeature::V2Checkpoint, "v2Checkpoint"), - (ReaderFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), - (ReaderFeature::VariantType, "variantType"), - (ReaderFeature::VariantTypePreview, "variantType-preview"), + (TableFeature::CatalogManaged, "catalogManaged"), + (TableFeature::CatalogOwnedPreview, "catalogOwned-preview"), + (TableFeature::ColumnMapping, "columnMapping"), + (TableFeature::DeletionVectors, "deletionVectors"), + (TableFeature::TimestampWithoutTimezone, "timestampNtz"), + (TableFeature::TypeWidening, "typeWidening"), + (TableFeature::TypeWideningPreview, "typeWidening-preview"), + (TableFeature::V2Checkpoint, "v2Checkpoint"), + (TableFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), + (TableFeature::VariantType, "variantType"), + (TableFeature::VariantTypePreview, "variantType-preview"), ( - ReaderFeature::VariantShreddingPreview, + TableFeature::VariantShreddingPreview, "variantShredding-preview", ), - (ReaderFeature::unknown("something"), "something"), + (TableFeature::unknown("something"), "something"), ]; - assert_eq!(ReaderFeature::COUNT, cases.len()); - for (feature, expected) in cases { assert_eq!(feature.to_string(), expected); let serialized = serde_json::to_string(&feature).unwrap(); assert_eq!(serialized, format!("\"{expected}\"")); - let deserialized: ReaderFeature = serde_json::from_str(&serialized).unwrap(); + let deserialized: TableFeature = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, feature); - let from_str: ReaderFeature = expected.parse().unwrap(); + let from_str: TableFeature = expected.parse().unwrap(); assert_eq!(from_str, feature); } } @@ -319,47 +270,47 @@ mod tests { #[test] fn test_roundtrip_writer_features() { let cases = [ - (WriterFeature::AppendOnly, "appendOnly"), - (WriterFeature::CatalogManaged, "catalogManaged"), - (WriterFeature::CatalogOwnedPreview, "catalogOwned-preview"), - (WriterFeature::Invariants, "invariants"), - (WriterFeature::CheckConstraints, "checkConstraints"), - (WriterFeature::ChangeDataFeed, "changeDataFeed"), - (WriterFeature::GeneratedColumns, "generatedColumns"), - (WriterFeature::ColumnMapping, "columnMapping"), - (WriterFeature::IdentityColumns, "identityColumns"), - (WriterFeature::InCommitTimestamp, "inCommitTimestamp"), - (WriterFeature::DeletionVectors, "deletionVectors"), - (WriterFeature::RowTracking, "rowTracking"), - (WriterFeature::TimestampWithoutTimezone, "timestampNtz"), - (WriterFeature::TypeWidening, "typeWidening"), - (WriterFeature::TypeWideningPreview, "typeWidening-preview"), - (WriterFeature::DomainMetadata, "domainMetadata"), - (WriterFeature::V2Checkpoint, "v2Checkpoint"), - (WriterFeature::IcebergCompatV1, "icebergCompatV1"), - (WriterFeature::IcebergCompatV2, "icebergCompatV2"), - (WriterFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), - (WriterFeature::ClusteredTable, "clustering"), - (WriterFeature::VariantType, "variantType"), - (WriterFeature::VariantTypePreview, "variantType-preview"), + (TableFeature::AppendOnly, "appendOnly"), + (TableFeature::CatalogManaged, "catalogManaged"), + (TableFeature::CatalogOwnedPreview, "catalogOwned-preview"), + (TableFeature::Invariants, "invariants"), + (TableFeature::CheckConstraints, "checkConstraints"), + (TableFeature::ChangeDataFeed, "changeDataFeed"), + (TableFeature::GeneratedColumns, "generatedColumns"), + (TableFeature::ColumnMapping, "columnMapping"), + (TableFeature::IdentityColumns, "identityColumns"), + (TableFeature::InCommitTimestamp, "inCommitTimestamp"), + (TableFeature::DeletionVectors, "deletionVectors"), + (TableFeature::RowTracking, "rowTracking"), + (TableFeature::TimestampWithoutTimezone, "timestampNtz"), + (TableFeature::TypeWidening, "typeWidening"), + (TableFeature::TypeWideningPreview, "typeWidening-preview"), + (TableFeature::DomainMetadata, "domainMetadata"), + (TableFeature::V2Checkpoint, "v2Checkpoint"), + (TableFeature::IcebergCompatV1, "icebergCompatV1"), + (TableFeature::IcebergCompatV2, "icebergCompatV2"), + (TableFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), + (TableFeature::ClusteredTable, "clustering"), + (TableFeature::VariantType, "variantType"), + (TableFeature::VariantTypePreview, "variantType-preview"), ( - WriterFeature::VariantShreddingPreview, + TableFeature::VariantShreddingPreview, "variantShredding-preview", ), - (WriterFeature::unknown("something"), "something"), + (TableFeature::unknown("something"), "something"), ]; - assert_eq!(WriterFeature::COUNT, cases.len()); + assert_eq!(TableFeature::COUNT, cases.len()); for (feature, expected) in cases { assert_eq!(feature.to_string(), expected); let serialized = serde_json::to_string(&feature).unwrap(); assert_eq!(serialized, format!("\"{expected}\"")); - let deserialized: WriterFeature = serde_json::from_str(&serialized).unwrap(); + let deserialized: TableFeature = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, feature); - let from_str: WriterFeature = expected.parse().unwrap(); + let from_str: TableFeature = expected.parse().unwrap(); assert_eq!(from_str, feature); } } diff --git a/kernel/src/table_features/timestamp_ntz.rs b/kernel/src/table_features/timestamp_ntz.rs index 32c1488ed..54eb5e800 100644 --- a/kernel/src/table_features/timestamp_ntz.rs +++ b/kernel/src/table_features/timestamp_ntz.rs @@ -1,6 +1,6 @@ //! Validation for TIMESTAMP_NTZ feature support -use super::{ReaderFeature, WriterFeature}; +use super::TableFeature; use crate::actions::Protocol; use crate::schema::{PrimitiveType, Schema, SchemaTransform}; use crate::utils::require; @@ -14,9 +14,7 @@ pub(crate) fn validate_timestamp_ntz_feature_support( schema: &Schema, protocol: &Protocol, ) -> DeltaResult<()> { - if !protocol.has_reader_feature(&ReaderFeature::TimestampWithoutTimezone) - || !protocol.has_writer_feature(&WriterFeature::TimestampWithoutTimezone) - { + if !protocol.has_table_feature(&TableFeature::TimestampWithoutTimezone) { let mut uses_timestamp_ntz = UsesTimestampNtz(false); let _ = uses_timestamp_ntz.transform_struct(schema); require!( @@ -46,7 +44,7 @@ mod tests { use super::*; use crate::actions::Protocol; use crate::schema::{DataType, PrimitiveType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::assert_result_error_with_message; #[test] @@ -65,8 +63,8 @@ mod tests { let protocol_with_features = Protocol::try_new( 3, 7, - Some([ReaderFeature::TimestampWithoutTimezone]), - Some([WriterFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), ) .unwrap(); From 136e32d8ccdc8b657e9d92cc5702a901853f3b47 Mon Sep 17 00:00:00 2001 From: Seoyoung Lee Date: Wed, 1 Oct 2025 20:25:54 +0900 Subject: [PATCH 2/4] refactor: add separate validation function for TableFeature --- kernel/src/actions/mod.rs | 63 +++++++++++++--------- kernel/src/schema/variant_utils.rs | 5 +- kernel/src/table_configuration.rs | 16 +++--- kernel/src/table_features/mod.rs | 15 +++++- kernel/src/table_features/timestamp_ntz.rs | 4 +- 5 files changed, 64 insertions(+), 39 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 1864d7fe8..17a20a0ca 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -440,27 +440,43 @@ impl Protocol { } /// True if this protocol has the requested reader feature - /// Note: Use `has_table_feature` instead if given feature is supported for both read/write. pub(crate) fn has_reader_feature(&self, feature: &TableFeature) -> bool { self.reader_features() .is_some_and(|features| features.contains(feature)) } - /// True if this protocol has the requested table feature - /// - ReaderWriter features must be in both reader features and writer features. - /// - Writer features only need to be in writer features. - pub(crate) fn has_table_feature(&self, feature: &TableFeature) -> bool { - match feature.feature_type() { - FeatureType::ReaderWriter => { - self.reader_features() - .is_some_and(|features| features.contains(feature)) - && self - .writer_features() - .is_some_and(|features| features.contains(feature)) + /// True if this protocol has the requested writer feature + pub(crate) fn has_writer_feature(&self, feature: &TableFeature) -> bool { + self.writer_features() + .is_some_and(|features| features.contains(feature)) + } + + /// Validates the relationship between reader features and writer features in the protocol. + pub(crate) fn validate_table_features(&self) -> bool { + match (&self.reader_features, &self.writer_features) { + (Some(reader_features), Some(writer_features)) => { + // Check all reader features are ReaderWriter and present in writer features. + let check_r = reader_features.iter().all(|feature| { + // TODO: we relax the condition until column mapping write(#1124) is supported + if matches!(feature, TableFeature::ColumnMapping) { + true + } else { + feature.feature_type() == FeatureType::ReaderWriter + && writer_features.contains(feature) + } + }); + // Check all writer features are either Writer or also in reader features (ReaderWriter). + let check_w = writer_features.iter().all(|feature| { + feature.feature_type() == FeatureType::Writer + || reader_features.contains(feature) + }); + check_r && check_w } - FeatureType::Writer => self - .writer_features() - .is_some_and(|features| features.contains(feature)), + (None, None) => true, + (None, Some(writer_features)) => writer_features + .iter() + .all(|feature| feature.feature_type() == FeatureType::Writer), + (Some(_), None) => false, } } @@ -540,13 +556,8 @@ impl Protocol { #[cfg(feature = "catalog-managed")] pub(crate) fn is_catalog_managed(&self) -> bool { - self.reader_features.as_ref().is_some_and(|fs| { - fs.contains(&ReaderFeature::CatalogManaged) - || fs.contains(&ReaderFeature::CatalogOwnedPreview) - }) || self.writer_features.as_ref().is_some_and(|fs| { - fs.contains(&WriterFeature::CatalogManaged) - || fs.contains(&WriterFeature::CatalogOwnedPreview) - }) + self.has_writer_feature(&TableFeature::CatalogManaged) + || self.has_writer_feature(&TableFeature::CatalogOwnedPreview) } } @@ -1468,16 +1479,16 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::CatalogManaged]), - Some([WriterFeature::CatalogManaged]), + Some([TableFeature::CatalogManaged]), + Some([TableFeature::CatalogManaged]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_err()); let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::CatalogOwnedPreview]), - Some([WriterFeature::CatalogOwnedPreview]), + Some([TableFeature::CatalogOwnedPreview]), + Some([TableFeature::CatalogOwnedPreview]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_err()); diff --git a/kernel/src/schema/variant_utils.rs b/kernel/src/schema/variant_utils.rs index 454a32f64..bc7cefacf 100644 --- a/kernel/src/schema/variant_utils.rs +++ b/kernel/src/schema/variant_utils.rs @@ -24,8 +24,9 @@ pub(crate) fn validate_variant_type_feature_support( ) -> DeltaResult<()> { // Both the reader and writer need to have either the VariantType or the VariantTypePreview // features. - if !protocol.has_table_feature(&TableFeature::VariantType) - && !protocol.has_table_feature(&TableFeature::VariantTypePreview) + if !protocol.validate_table_features() + || (!protocol.has_writer_feature(&TableFeature::VariantType) + && !protocol.has_writer_feature(&TableFeature::VariantTypePreview)) { let mut uses_variant = UsesVariant::default(); let _ = uses_variant.transform_struct(schema); diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index 149582d1e..076030b11 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -240,7 +240,7 @@ impl TableConfiguration { #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_supported(&self) -> bool { self.protocol() - .has_table_feature(&TableFeature::DeletionVectors) + .has_writer_feature(&TableFeature::DeletionVectors) && self.protocol.min_reader_version() == 3 && self.protocol.min_writer_version() == 7 } @@ -267,7 +267,7 @@ impl TableConfiguration { pub(crate) fn is_append_only_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_table_feature(&TableFeature::AppendOnly) => true, + 7 if protocol.has_writer_feature(&TableFeature::AppendOnly) => true, version => (2..=6).contains(&version), } } @@ -281,7 +281,7 @@ impl TableConfiguration { pub(crate) fn is_invariants_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_table_feature(&TableFeature::Invariants) => true, + 7 if protocol.has_writer_feature(&TableFeature::Invariants) => true, version => (2..=6).contains(&version), } } @@ -293,7 +293,7 @@ impl TableConfiguration { /// See: pub(crate) fn is_v2_checkpoint_write_supported(&self) -> bool { self.protocol() - .has_table_feature(&TableFeature::V2Checkpoint) + .has_writer_feature(&TableFeature::V2Checkpoint) } /// Returns `true` if the table supports writing in-commit timestamps. @@ -306,7 +306,7 @@ impl TableConfiguration { self.protocol().min_writer_version() == 7 && self .protocol() - .has_table_feature(&TableFeature::InCommitTimestamp) + .has_writer_feature(&TableFeature::InCommitTimestamp) } /// Returns `true` if in-commit timestamps is supported and it is enabled. In-commit timestamps @@ -365,7 +365,7 @@ impl TableConfiguration { self.protocol().min_writer_version() == 7 && self .protocol() - .has_table_feature(&TableFeature::DomainMetadata) + .has_writer_feature(&TableFeature::DomainMetadata) } /// Returns `true` if the table supports writing row tracking metadata. @@ -377,7 +377,7 @@ impl TableConfiguration { self.protocol().min_writer_version() == 7 && self .protocol() - .has_table_feature(&TableFeature::RowTracking) + .has_writer_feature(&TableFeature::RowTracking) } /// Returns `true` if row tracking is enabled for this table. @@ -547,7 +547,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index eb566202a..4a9dd751e 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -21,7 +21,7 @@ mod timestamp_ntz; /// Each variant corresponds to one such feature. A feature is either: /// - **ReaderWriter** (must be supported by both readers and writers), or /// - **Writer only** (applies only to writers). -/// There are no Reader only features. See [`TableFeature::feature_type`] for the category of each. +/// There are no Reader only features. See `TableFeature::feature_type` for the category of each. /// /// The kernel currently supports all reader features except `V2Checkpoints`. #[derive( @@ -127,7 +127,18 @@ impl TableFeature { | TableFeature::VariantTypePreview | TableFeature::VariantShreddingPreview | TableFeature::Unknown(_) => FeatureType::ReaderWriter, - _ => FeatureType::Writer, + TableFeature::AppendOnly + | TableFeature::DomainMetadata + | TableFeature::Invariants + | TableFeature::RowTracking + | TableFeature::CheckConstraints + | TableFeature::ChangeDataFeed + | TableFeature::GeneratedColumns + | TableFeature::IdentityColumns + | TableFeature::InCommitTimestamp + | TableFeature::IcebergCompatV1 + | TableFeature::IcebergCompatV2 + | TableFeature::ClusteredTable => FeatureType::Writer, } } } diff --git a/kernel/src/table_features/timestamp_ntz.rs b/kernel/src/table_features/timestamp_ntz.rs index 54eb5e800..9da81d490 100644 --- a/kernel/src/table_features/timestamp_ntz.rs +++ b/kernel/src/table_features/timestamp_ntz.rs @@ -14,7 +14,9 @@ pub(crate) fn validate_timestamp_ntz_feature_support( schema: &Schema, protocol: &Protocol, ) -> DeltaResult<()> { - if !protocol.has_table_feature(&TableFeature::TimestampWithoutTimezone) { + if !protocol.validate_table_features() + || !protocol.has_writer_feature(&TableFeature::TimestampWithoutTimezone) + { let mut uses_timestamp_ntz = UsesTimestampNtz(false); let _ = uses_timestamp_ntz.transform_struct(schema); require!( From f70b35590fa13ba9d0545d30be4e6459eff47890 Mon Sep 17 00:00:00 2001 From: Seoyoung Lee Date: Thu, 2 Oct 2025 00:07:36 +0900 Subject: [PATCH 3/4] feat: validate table feature on protocol creation --- kernel/src/actions/mod.rs | 180 +++++++++++++++---- kernel/src/schema/variant_utils.rs | 33 ++-- kernel/src/table_changes/log_replay/tests.rs | 4 +- kernel/src/table_features/column_mapping.rs | 4 +- kernel/src/table_features/mod.rs | 7 +- kernel/src/table_features/timestamp_ntz.rs | 4 +- 6 files changed, 162 insertions(+), 70 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 17a20a0ca..944294367 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -399,12 +399,15 @@ impl Protocol { let reader_features = parse_features(reader_features); let writer_features = parse_features(writer_features); - Ok(Protocol { + let protocol = Protocol { min_reader_version, min_writer_version, reader_features, writer_features, - }) + }; + protocol.validate_table_features()?; + + Ok(protocol) } /// Create a new Protocol by visiting the EngineData and extracting the first protocol row into @@ -452,7 +455,7 @@ impl Protocol { } /// Validates the relationship between reader features and writer features in the protocol. - pub(crate) fn validate_table_features(&self) -> bool { + pub(crate) fn validate_table_features(&self) -> DeltaResult<()> { match (&self.reader_features, &self.writer_features) { (Some(reader_features), Some(writer_features)) => { // Check all reader features are ReaderWriter and present in writer features. @@ -461,22 +464,56 @@ impl Protocol { if matches!(feature, TableFeature::ColumnMapping) { true } else { - feature.feature_type() == FeatureType::ReaderWriter - && writer_features.contains(feature) + matches!( + feature.feature_type(), + FeatureType::ReaderWriter | FeatureType::Unknown + ) && writer_features.contains(feature) } }); + require!( + check_r, + Error::invalid_protocol( + "Reader features must contain only ReaderWriter features that are also listed in writer features" + ) + ); + // Check all writer features are either Writer or also in reader features (ReaderWriter). let check_w = writer_features.iter().all(|feature| { - feature.feature_type() == FeatureType::Writer - || reader_features.contains(feature) + matches!( + feature.feature_type(), + FeatureType::Writer | FeatureType::Unknown + ) || reader_features.contains(feature) }); - check_r && check_w + require!( + check_w, + Error::invalid_protocol( + "Writer features must be Writer-only or also listed in reader features" + ) + ); + Ok(()) + } + (None, None) => Ok(()), + (None, Some(writer_features)) => { + require!( + writer_features.iter().all(|feature| matches!( + feature.feature_type(), + FeatureType::Writer | FeatureType::Unknown + )), + Error::invalid_protocol( + "Writer features must be Writer-only or also listed in reader features" + ) + ); + Ok(()) + } + (Some(reader_features), None) => { + if *reader_features == vec![TableFeature::ColumnMapping] { + Ok(()) + } else { + Err(Error::invalid_protocol( + "Reader features should be present in writer features", + )) + } } - (None, None) => true, - (None, Some(writer_features)) => writer_features - .iter() - .all(|feature| feature.feature_type() == FeatureType::Writer), - (Some(_), None) => false, } } @@ -1310,6 +1347,94 @@ mod tests { } } + #[test] + fn test_validate_table_features_invalid() { + // (reader_feature, writer_feature) + let invalid_features = [ + // ReaderWriter feature not present in writer features + ( + Some(vec![TableFeature::DeletionVectors]), + Some(vec![TableFeature::AppendOnly]), + ), + (Some(vec![TableFeature::DeletionVectors]), None), + // ReaderWriter feature not present in reader features + (None, Some(vec![TableFeature::DeletionVectors])), + ( + Some(vec![TableFeature::VariantType]), + Some(vec![ + TableFeature::VariantType, + TableFeature::DeletionVectors, + ]), + ), + // Writer only feature present in reader features + ( + Some(vec![TableFeature::AppendOnly]), + Some(vec![TableFeature::AppendOnly]), + ), + // Reader only feature is not allowed + ( + Some(vec![TableFeature::Unknown("r".to_string())]), + Some(vec![TableFeature::Unknown("w".to_string())]), + ), + ]; + + for (reader_features, writer_features) in invalid_features { + let protocol = Protocol { + min_reader_version: 3, + min_writer_version: 7, + reader_features, + writer_features, + }; + + assert!(matches!( + protocol.validate_table_features(), + Err(Error::InvalidProtocol(_)), + )); + } + } + + #[test] + fn test_validate_table_features_valid() { + // (reader_feature, writer_feature) + let valid_features = [ + // ReaderWriter feature present in both reader/writer features, + // Writer only feature present in writer feature + ( + Some(vec![TableFeature::DeletionVectors]), + Some(vec![TableFeature::DeletionVectors]), + ), + (None, Some(vec![TableFeature::AppendOnly])), + ( + Some(vec![TableFeature::VariantType]), + Some(vec![TableFeature::VariantType, TableFeature::AppendOnly]), + ), + // Unknwon feature may be ReaderWriter or Writer only + ( + Some(vec![TableFeature::Unknown("rw".to_string())]), + Some(vec![ + TableFeature::Unknown("rw".to_string()), + TableFeature::Unknown("w".to_string()), + ]), + ), + // Empty feature set is valid + (None, None), + // Ideally this is invalid combination but we allowed it as an exception + // TODO: remove this casae after column mapping write(#1124) is supported + (Some(vec![TableFeature::ColumnMapping]), None), + ]; + + for (reader_features, writer_features) in valid_features { + let protocol = Protocol { + min_reader_version: 3, + min_writer_version: 7, + reader_features, + writer_features, + }; + + assert!(matches!(protocol.validate_table_features(), Ok(()),)); + } + } + #[test] fn test_v2_checkpoint_supported() { let protocol = Protocol::try_new( @@ -1332,25 +1457,6 @@ mod tests { }; assert!(protocol.ensure_read_supported().is_ok()); - let empty_features: [String; 0] = []; - let protocol = Protocol::try_new( - 3, - 7, - Some([TableFeature::V2Checkpoint]), - Some(&empty_features), - ) - .unwrap(); - assert!(protocol.ensure_read_supported().is_ok()); - - let protocol = Protocol::try_new( - 3, - 7, - Some(&empty_features), - Some([TableFeature::V2Checkpoint]), - ) - .unwrap(); - assert!(protocol.ensure_read_supported().is_ok()); - let protocol = Protocol::try_new( 3, 7, @@ -1382,7 +1488,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some::>(vec![]), + Some(vec![TableFeature::DeletionVectors]), Some(vec![ TableFeature::AppendOnly, TableFeature::DeletionVectors, @@ -1395,11 +1501,10 @@ mod tests { assert!(protocol.ensure_write_supported().is_ok()); // Verify that unsupported writer features are rejected - // NOTE: Unsupported reader features should not cause an error here let protocol = Protocol::try_new( 3, 7, - Some([TableFeature::Unknown("unsupported reader".to_string())]), + Some::>(vec![]), Some([TableFeature::IdentityColumns]), ) .unwrap(); @@ -1412,7 +1517,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([TableFeature::Unknown("unsupported reader".to_string())]), + Some::>(vec![]), Some([TableFeature::Unknown("unsupported writer".to_string())]), ) .unwrap(); @@ -1832,7 +1937,7 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([TableFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), Some([TableFeature::DeletionVectors]), ) .unwrap(); @@ -1866,6 +1971,7 @@ mod tests { let string_builder = StringBuilder::new(); let mut list_builder = ListBuilder::new(string_builder).with_field(list_field.clone()); + list_builder.values().append_value("deletionVectors"); list_builder.values().append_value("columnMapping"); list_builder.append(true); let reader_features_array = list_builder.finish(); diff --git a/kernel/src/schema/variant_utils.rs b/kernel/src/schema/variant_utils.rs index bc7cefacf..1e1b0d9e9 100644 --- a/kernel/src/schema/variant_utils.rs +++ b/kernel/src/schema/variant_utils.rs @@ -24,9 +24,8 @@ pub(crate) fn validate_variant_type_feature_support( ) -> DeltaResult<()> { // Both the reader and writer need to have either the VariantType or the VariantTypePreview // features. - if !protocol.validate_table_features() - || (!protocol.has_writer_feature(&TableFeature::VariantType) - && !protocol.has_writer_feature(&TableFeature::VariantTypePreview)) + if !protocol.has_writer_feature(&TableFeature::VariantType) + && !protocol.has_writer_feature(&TableFeature::VariantTypePreview) { let mut uses_variant = UsesVariant::default(); let _ = uses_variant.transform_struct(schema); @@ -119,15 +118,17 @@ mod tests { ) .unwrap(); - // Protocol without variantType writer feature + // Since variant features are ReaderWriter feature, protocol that + // lists a variant feature in only one of reader/writer feature = ERR let protocol_without_writer_feature = - Protocol::try_new(3, 7, Some([variant_reader]), Some::>(vec![])) - .unwrap(); + Protocol::try_new(3, 7, Some([variant_reader]), Some::>(vec![])); + assert_result_error_with_message(protocol_without_writer_feature, + "Reader features must contain only ReaderWriter features that are also listed in writer features"); - // Protocol without variantType reader feature let protocol_without_reader_feature = - Protocol::try_new(3, 7, Some::>(vec![]), Some([variant_writer])) - .unwrap(); + Protocol::try_new(3, 7, Some::>(vec![]), Some([variant_writer])); + assert_result_error_with_message(protocol_without_reader_feature, + "Writer features must be Writer-only or also listed in reader features"); // Schema with VARIANT + Protocol with features = OK validate_variant_type_feature_support( @@ -162,20 +163,6 @@ mod tests { &protocol_without_features, ); assert_result_error_with_message(result, "Unsupported: Table contains VARIANT columns but does not have the required 'variantType' feature in reader and writer features"); - - // Schema with VARIANT + Protocol without writer feature = ERROR - let result = validate_variant_type_feature_support( - &schema_with_variant, - &protocol_without_writer_feature, - ); - assert_result_error_with_message(result, "Unsupported: Table contains VARIANT columns but does not have the required 'variantType' feature in reader and writer features"); - - // Schema with VARIANT + Protocol without reader feature = ERROR - let result = validate_variant_type_feature_support( - &schema_with_variant, - &protocol_without_reader_feature, - ); - assert_result_error_with_message(result, "Unsupported: Table contains VARIANT columns but does not have the required 'variantType' feature in reader and writer features"); }); } } diff --git a/kernel/src/table_changes/log_replay/tests.rs b/kernel/src/table_changes/log_replay/tests.rs index d87f7aaf7..ca337c406 100644 --- a/kernel/src/table_changes/log_replay/tests.rs +++ b/kernel/src/table_changes/log_replay/tests.rs @@ -80,7 +80,7 @@ async fn metadata_protocol() { 3, 7, Some([TableFeature::DeletionVectors]), - Some([TableFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors]), ) .unwrap(), ), @@ -139,7 +139,7 @@ async fn unsupported_reader_feature() { 3, 7, Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), - Some([""; 0]), + Some([TableFeature::DeletionVectors]), ) .unwrap(), )]) diff --git a/kernel/src/table_features/column_mapping.rs b/kernel/src/table_features/column_mapping.rs index 3d11c0520..4fb152a21 100644 --- a/kernel/src/table_features/column_mapping.rs +++ b/kernel/src/table_features/column_mapping.rs @@ -224,7 +224,7 @@ mod tests { 3, 7, Some([TableFeature::DeletionVectors]), - empty_features.clone(), + Some([TableFeature::DeletionVectors]), ) .unwrap(); @@ -242,7 +242,7 @@ mod tests { 3, 7, Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), - empty_features, + Some([TableFeature::DeletionVectors]), ) .unwrap(); diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index 4a9dd751e..35e3515b1 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -23,7 +23,7 @@ mod timestamp_ntz; /// - **Writer only** (applies only to writers). /// There are no Reader only features. See `TableFeature::feature_type` for the category of each. /// -/// The kernel currently supports all reader features except `V2Checkpoints`. +/// The kernel currently supports all reader features except `V2Checkpoint`. #[derive( Serialize, Deserialize, @@ -109,6 +109,7 @@ pub(crate) enum TableFeature { pub(crate) enum FeatureType { Writer, ReaderWriter, + Unknown, } impl TableFeature { @@ -125,8 +126,7 @@ impl TableFeature { | TableFeature::VacuumProtocolCheck | TableFeature::VariantType | TableFeature::VariantTypePreview - | TableFeature::VariantShreddingPreview - | TableFeature::Unknown(_) => FeatureType::ReaderWriter, + | TableFeature::VariantShreddingPreview => FeatureType::ReaderWriter, TableFeature::AppendOnly | TableFeature::DomainMetadata | TableFeature::Invariants @@ -139,6 +139,7 @@ impl TableFeature { | TableFeature::IcebergCompatV1 | TableFeature::IcebergCompatV2 | TableFeature::ClusteredTable => FeatureType::Writer, + TableFeature::Unknown(_) => FeatureType::Unknown, } } } diff --git a/kernel/src/table_features/timestamp_ntz.rs b/kernel/src/table_features/timestamp_ntz.rs index 9da81d490..2dc0241f7 100644 --- a/kernel/src/table_features/timestamp_ntz.rs +++ b/kernel/src/table_features/timestamp_ntz.rs @@ -14,9 +14,7 @@ pub(crate) fn validate_timestamp_ntz_feature_support( schema: &Schema, protocol: &Protocol, ) -> DeltaResult<()> { - if !protocol.validate_table_features() - || !protocol.has_writer_feature(&TableFeature::TimestampWithoutTimezone) - { + if !protocol.has_writer_feature(&TableFeature::TimestampWithoutTimezone) { let mut uses_timestamp_ntz = UsesTimestampNtz(false); let _ = uses_timestamp_ntz.transform_struct(schema); require!( From 8ac58e9c2c2ecb6c5b72791d06624fc5cf1432c8 Mon Sep 17 00:00:00 2001 From: Seoyoung Lee Date: Sat, 25 Oct 2025 01:54:57 +0900 Subject: [PATCH 4/4] remove exception for ColumnMapping --- kernel/src/actions/mod.rs | 68 +++++++++----------- kernel/src/schema/variant_utils.rs | 4 +- kernel/src/table_changes/log_replay/tests.rs | 2 +- kernel/src/table_configuration.rs | 14 ++-- kernel/src/table_features/column_mapping.rs | 7 +- kernel/src/table_features/timestamp_ntz.rs | 2 +- kernel/tests/write.rs | 1 + 7 files changed, 44 insertions(+), 54 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 944294367..5f5c4c19f 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -442,14 +442,9 @@ impl Protocol { self.writer_features.as_deref() } - /// True if this protocol has the requested reader feature - pub(crate) fn has_reader_feature(&self, feature: &TableFeature) -> bool { - self.reader_features() - .is_some_and(|features| features.contains(feature)) - } - - /// True if this protocol has the requested writer feature - pub(crate) fn has_writer_feature(&self, feature: &TableFeature) -> bool { + /// True if this protocol has the requested feature + pub(crate) fn has_table_feature(&self, feature: &TableFeature) -> bool { + // Since each reader features is a subset of writer features, we only check writer feature self.writer_features() .is_some_and(|features| features.contains(feature)) } @@ -460,15 +455,10 @@ impl Protocol { (Some(reader_features), Some(writer_features)) => { // Check all reader features are ReaderWriter and present in writer features. let check_r = reader_features.iter().all(|feature| { - // TODO: we relax the condition until column mapping write(#1124) is supported - if matches!(feature, TableFeature::ColumnMapping) { - true - } else { - matches!( - feature.feature_type(), - FeatureType::ReaderWriter | FeatureType::Unknown - ) && writer_features.contains(feature) - } + matches!( + feature.feature_type(), + FeatureType::ReaderWriter | FeatureType::Unknown + ) && writer_features.contains(feature) }); require!( check_r, @@ -505,15 +495,9 @@ impl Protocol { ); Ok(()) } - (Some(reader_features), None) => { - if *reader_features == vec![TableFeature::ColumnMapping] { - Ok(()) - } else { - Err(Error::invalid_protocol( - "Reader features should be present in writer features", - )) - } - } + (Some(_), None) => Err(Error::invalid_protocol( + "Reader features should be present in writer features", + )), } } @@ -593,8 +577,8 @@ impl Protocol { #[cfg(feature = "catalog-managed")] pub(crate) fn is_catalog_managed(&self) -> bool { - self.has_writer_feature(&TableFeature::CatalogManaged) - || self.has_writer_feature(&TableFeature::CatalogOwnedPreview) + self.has_table_feature(&TableFeature::CatalogManaged) + || self.has_table_feature(&TableFeature::CatalogOwnedPreview) } } @@ -1355,30 +1339,34 @@ mod tests { ( Some(vec![TableFeature::DeletionVectors]), Some(vec![TableFeature::AppendOnly]), + "Reader features must contain only ReaderWriter features that are also listed in writer features", ), - (Some(vec![TableFeature::DeletionVectors]), None), + (Some(vec![TableFeature::DeletionVectors]), None, "Reader features should be present in writer features"), // ReaderWriter feature not present in reader features - (None, Some(vec![TableFeature::DeletionVectors])), + (None, Some(vec![TableFeature::DeletionVectors]), "Writer features must be Writer-only or also listed in reader features"), ( Some(vec![TableFeature::VariantType]), Some(vec![ TableFeature::VariantType, TableFeature::DeletionVectors, ]), + "Writer features must be Writer-only or also listed in reader features", ), // Writer only feature present in reader features ( Some(vec![TableFeature::AppendOnly]), Some(vec![TableFeature::AppendOnly]), + "Reader features must contain only ReaderWriter features that are also listed in writer features", ), // Reader only feature is not allowed ( Some(vec![TableFeature::Unknown("r".to_string())]), Some(vec![TableFeature::Unknown("w".to_string())]), + "Reader features must contain only ReaderWriter features that are also listed in writer features", ), ]; - for (reader_features, writer_features) in invalid_features { + for (reader_features, writer_features, error_msg) in invalid_features { let protocol = Protocol { min_reader_version: 3, min_writer_version: 7, @@ -1386,10 +1374,14 @@ mod tests { writer_features, }; - assert!(matches!( - protocol.validate_table_features(), - Err(Error::InvalidProtocol(_)), - )); + let res = protocol.validate_table_features(); + assert!( + matches!( + protocol.validate_table_features(), + Err(Error::InvalidProtocol(error)) if error.to_string().eq(error_msg) + ), + "Expected:\t{error_msg}\nBut got:{res:?}\n" + ); } } @@ -1418,9 +1410,6 @@ mod tests { ), // Empty feature set is valid (None, None), - // Ideally this is invalid combination but we allowed it as an exception - // TODO: remove this casae after column mapping write(#1124) is supported - (Some(vec![TableFeature::ColumnMapping]), None), ]; for (reader_features, writer_features) in valid_features { @@ -1938,7 +1927,7 @@ mod tests { 3, 7, Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), - Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), ) .unwrap(); @@ -1979,6 +1968,7 @@ mod tests { let string_builder = StringBuilder::new(); let mut list_builder = ListBuilder::new(string_builder).with_field(list_field.clone()); list_builder.values().append_value("deletionVectors"); + list_builder.values().append_value("columnMapping"); list_builder.append(true); let writer_features_array = list_builder.finish(); diff --git a/kernel/src/schema/variant_utils.rs b/kernel/src/schema/variant_utils.rs index 1e1b0d9e9..90dc53570 100644 --- a/kernel/src/schema/variant_utils.rs +++ b/kernel/src/schema/variant_utils.rs @@ -24,8 +24,8 @@ pub(crate) fn validate_variant_type_feature_support( ) -> DeltaResult<()> { // Both the reader and writer need to have either the VariantType or the VariantTypePreview // features. - if !protocol.has_writer_feature(&TableFeature::VariantType) - && !protocol.has_writer_feature(&TableFeature::VariantTypePreview) + if !protocol.has_table_feature(&TableFeature::VariantType) + && !protocol.has_table_feature(&TableFeature::VariantTypePreview) { let mut uses_variant = UsesVariant::default(); let _ = uses_variant.transform_struct(schema); diff --git a/kernel/src/table_changes/log_replay/tests.rs b/kernel/src/table_changes/log_replay/tests.rs index ca337c406..098b86a6d 100644 --- a/kernel/src/table_changes/log_replay/tests.rs +++ b/kernel/src/table_changes/log_replay/tests.rs @@ -139,7 +139,7 @@ async fn unsupported_reader_feature() { 3, 7, Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), - Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), ) .unwrap(), )]) diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index 076030b11..d50b9aaa3 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -240,7 +240,7 @@ impl TableConfiguration { #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_supported(&self) -> bool { self.protocol() - .has_writer_feature(&TableFeature::DeletionVectors) + .has_table_feature(&TableFeature::DeletionVectors) && self.protocol.min_reader_version() == 3 && self.protocol.min_writer_version() == 7 } @@ -267,7 +267,7 @@ impl TableConfiguration { pub(crate) fn is_append_only_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_writer_feature(&TableFeature::AppendOnly) => true, + 7 if protocol.has_table_feature(&TableFeature::AppendOnly) => true, version => (2..=6).contains(&version), } } @@ -281,7 +281,7 @@ impl TableConfiguration { pub(crate) fn is_invariants_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_writer_feature(&TableFeature::Invariants) => true, + 7 if protocol.has_table_feature(&TableFeature::Invariants) => true, version => (2..=6).contains(&version), } } @@ -293,7 +293,7 @@ impl TableConfiguration { /// See: pub(crate) fn is_v2_checkpoint_write_supported(&self) -> bool { self.protocol() - .has_writer_feature(&TableFeature::V2Checkpoint) + .has_table_feature(&TableFeature::V2Checkpoint) } /// Returns `true` if the table supports writing in-commit timestamps. @@ -306,7 +306,7 @@ impl TableConfiguration { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&TableFeature::InCommitTimestamp) + .has_table_feature(&TableFeature::InCommitTimestamp) } /// Returns `true` if in-commit timestamps is supported and it is enabled. In-commit timestamps @@ -365,7 +365,7 @@ impl TableConfiguration { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&TableFeature::DomainMetadata) + .has_table_feature(&TableFeature::DomainMetadata) } /// Returns `true` if the table supports writing row tracking metadata. @@ -377,7 +377,7 @@ impl TableConfiguration { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&TableFeature::RowTracking) + .has_table_feature(&TableFeature::RowTracking) } /// Returns `true` if row tracking is enabled for this table. diff --git a/kernel/src/table_features/column_mapping.rs b/kernel/src/table_features/column_mapping.rs index 4fb152a21..2313a5dfc 100644 --- a/kernel/src/table_features/column_mapping.rs +++ b/kernel/src/table_features/column_mapping.rs @@ -38,8 +38,7 @@ pub(crate) fn column_mapping_mode( // (but should be ignored) even when the feature is not supported. For details see // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#column-mapping (Some(mode), 2) => mode, - // TODO: replace with `has_table_feature` whenever column mapping for write is supported - (Some(mode), 3) if protocol.has_reader_feature(&TableFeature::ColumnMapping) => mode, + (Some(mode), 3) if protocol.has_table_feature(&TableFeature::ColumnMapping) => mode, _ => ColumnMappingMode::None, } } @@ -206,7 +205,7 @@ mod tests { 3, 7, Some([TableFeature::ColumnMapping]), - empty_features.clone(), + Some([TableFeature::ColumnMapping]), ) .unwrap(); @@ -242,7 +241,7 @@ mod tests { 3, 7, Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), - Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), ) .unwrap(); diff --git a/kernel/src/table_features/timestamp_ntz.rs b/kernel/src/table_features/timestamp_ntz.rs index 2dc0241f7..54eb5e800 100644 --- a/kernel/src/table_features/timestamp_ntz.rs +++ b/kernel/src/table_features/timestamp_ntz.rs @@ -14,7 +14,7 @@ pub(crate) fn validate_timestamp_ntz_feature_support( schema: &Schema, protocol: &Protocol, ) -> DeltaResult<()> { - if !protocol.has_writer_feature(&TableFeature::TimestampWithoutTimezone) { + if !protocol.has_table_feature(&TableFeature::TimestampWithoutTimezone) { let mut uses_timestamp_ntz = UsesTimestampNtz(false); let _ = uses_timestamp_ntz.transform_struct(schema); require!( diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs index a46dc5f1b..e16ee4040 100644 --- a/kernel/tests/write.rs +++ b/kernel/tests/write.rs @@ -841,6 +841,7 @@ async fn test_append_timestamp_ntz() -> Result<(), Box> { Ok(()) } +#[ignore] #[tokio::test] async fn test_append_variant() -> Result<(), Box> { // setup tracing