diff --git a/kernel/src/actions/crc.rs b/kernel/src/actions/crc.rs index e9265b180..655340e8c 100644 --- a/kernel/src/actions/crc.rs +++ b/kernel/src/actions/crc.rs @@ -150,7 +150,7 @@ mod tests { use crate::engine::sync::SyncEngine; use crate::schema::derive_macro_utils::ToDataType as _; use crate::schema::{ArrayType, DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::string_array_to_engine_data; use crate::Engine; @@ -252,8 +252,8 @@ mod tests { let expected_protocol = Protocol { min_reader_version: 3, min_writer_version: 7, - reader_features: Some(vec![ReaderFeature::ColumnMapping]), - writer_features: Some(vec![WriterFeature::ColumnMapping]), + reader_features: Some(vec![TableFeature::ColumnMapping]), + writer_features: Some(vec![TableFeature::ColumnMapping]), }; let expected_metadata = Metadata { id: "testId".to_string(), diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 77836d27b..e6cf433f2 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -13,7 +13,7 @@ use crate::schema::{ ArrayType, DataType, MapType, SchemaRef, StructField, StructType, ToSchema as _, }; use crate::table_features::{ - ReaderFeature, WriterFeature, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES, + FeatureType, TableFeature, SUPPORTED_READER_FEATURES, SUPPORTED_WRITER_FEATURES, }; use crate::table_properties::TableProperties; use crate::utils::require; @@ -348,11 +348,11 @@ pub(crate) struct Protocol { /// A collection of features that a client must implement in order to correctly /// read this table (exist only when minReaderVersion is set to 3) #[serde(skip_serializing_if = "Option::is_none")] - reader_features: Option>, + reader_features: Option>, /// A collection of features that a client must implement in order to correctly /// write this table (exist only when minWriterVersion is set to 7) #[serde(skip_serializing_if = "Option::is_none")] - writer_features: Option>, + writer_features: Option>, } fn parse_features(features: Option>) -> Option> @@ -379,32 +379,18 @@ impl Protocol { reader_features: Option>, writer_features: Option>, ) -> DeltaResult { - if min_reader_version == 3 { - require!( - reader_features.is_some(), - Error::invalid_protocol( - "Reader features must be present when minimum reader version = 3" - ) - ); - } - if min_writer_version == 7 { - require!( - writer_features.is_some(), - Error::invalid_protocol( - "Writer features must be present when minimum writer version = 7" - ) - ); - } - let reader_features = parse_features(reader_features); let writer_features = parse_features(writer_features); - Ok(Protocol { + let protocol = Protocol { min_reader_version, min_writer_version, reader_features, writer_features, - }) + }; + protocol.validate_table_features()?; + + Ok(protocol) } /// Create a new Protocol by visiting the EngineData and extracting the first protocol row into @@ -429,26 +415,112 @@ impl Protocol { /// Get the reader features for the protocol #[internal_api] - pub(crate) fn reader_features(&self) -> Option<&[ReaderFeature]> { + pub(crate) fn reader_features(&self) -> Option<&[TableFeature]> { self.reader_features.as_deref() } /// Get the writer features for the protocol #[internal_api] - pub(crate) fn writer_features(&self) -> Option<&[WriterFeature]> { + pub(crate) fn writer_features(&self) -> Option<&[TableFeature]> { self.writer_features.as_deref() } - /// True if this protocol has the requested reader feature - pub(crate) fn has_reader_feature(&self, feature: &ReaderFeature) -> bool { - self.reader_features() + /// True if this protocol has the requested feature. + /// Since reader features are always a subset of writer features, we only check writer features. + pub(crate) fn has_table_feature(&self, feature: &TableFeature) -> bool { + self.writer_features() .is_some_and(|features| features.contains(feature)) } - /// True if this protocol has the requested writer feature - pub(crate) fn has_writer_feature(&self, feature: &WriterFeature) -> bool { - self.writer_features() - .is_some_and(|features| features.contains(feature)) + /// Validates the relationship between reader features and writer features in the protocol. + pub(crate) fn validate_table_features(&self) -> DeltaResult<()> { + // Validate reader version and features relationship + if self.min_reader_version == 3 { + require!( + self.reader_features.is_some(), + Error::invalid_protocol( + "Reader features must be present when minimum reader version = 3" + ) + ); + } else { + require!( + self.reader_features.is_none(), + Error::invalid_protocol( + "Reader features must not be present when minimum reader version != 3" + ) + ); + } + + // Validate writer version and features relationship + if self.min_writer_version == 7 { + require!( + self.writer_features.is_some(), + Error::invalid_protocol( + "Writer features must be present when minimum writer version = 7" + ) + ); + } else { + require!( + self.writer_features.is_none(), + Error::invalid_protocol( + "Writer features must not be present when minimum writer version != 7" + ) + ); + } + + match (&self.reader_features, &self.writer_features) { + (Some(reader_features), Some(writer_features)) => { + // Check all reader features are ReaderWriter and present in writer features. + let check_r = reader_features.iter().all(|feature| { + matches!( + feature.feature_type(), + FeatureType::ReaderWriter | FeatureType::Unknown + ) && writer_features.contains(feature) + }); + require!( + check_r, + Error::invalid_protocol( + "Reader features must contain only ReaderWriter features that are also listed in writer features" + ) + ); + + // Check all writer features that are ReaderWriter must also be in reader features + let check_w = writer_features + .iter() + .all(|feature| match feature.feature_type() { + FeatureType::Writer | FeatureType::Unknown => true, + FeatureType::ReaderWriter => reader_features.contains(feature), + }); + require!( + check_w, + Error::invalid_protocol( + "Writer features must be Writer-only or also listed in reader features" + ) + ); + Ok(()) + } + (None, None) => Ok(()), + (None, Some(writer_features)) => { + // This is valid when minReaderVersion < 3 and minWriterVersion = 7 + // with only Writer-only features (no ReaderWriter features) + let all_writer_only = writer_features.iter().all(|feature| { + matches!( + feature.feature_type(), + FeatureType::Writer | FeatureType::Unknown + ) + }); + require!( + all_writer_only, + Error::invalid_protocol( + "When reader features are not present, all writer features must be Writer-only" + ) + ); + Ok(()) + } + (Some(_reader_features), None) => Err(Error::invalid_protocol( + "Reader features should be present in writer features", + )), + } } /// Check if reading a table with this protocol is supported. That is: does the kernel support @@ -496,8 +568,8 @@ impl Protocol { ensure_supported_features(writer_features, &SUPPORTED_WRITER_FEATURES)?; // ensure that there is no illegal combination of features - if writer_features.contains(&WriterFeature::RowTracking) - && !writer_features.contains(&WriterFeature::DomainMetadata) + if writer_features.contains(&TableFeature::RowTracking) + && !writer_features.contains(&TableFeature::DomainMetadata) { Err(Error::invalid_protocol( "rowTracking feature requires domainMetadata to also be enabled", @@ -527,13 +599,8 @@ impl Protocol { #[cfg(feature = "catalog-managed")] pub(crate) fn is_catalog_managed(&self) -> bool { - self.reader_features.as_ref().is_some_and(|fs| { - fs.contains(&ReaderFeature::CatalogManaged) - || fs.contains(&ReaderFeature::CatalogOwnedPreview) - }) || self.writer_features.as_ref().is_some_and(|fs| { - fs.contains(&WriterFeature::CatalogManaged) - || fs.contains(&WriterFeature::CatalogOwnedPreview) - }) + self.has_table_feature(&TableFeature::CatalogManaged) + || self.has_table_feature(&TableFeature::CatalogOwnedPreview) } } @@ -1287,13 +1354,290 @@ mod tests { } } + #[test] + fn test_validate_table_features_valid_readerwriter() { + // Valid: ReaderWriter feature in both lists + let protocol = Protocol::try_new( + 3, + 7, + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + + // Valid: Multiple ReaderWriter features in both lists + let protocol = Protocol::try_new( + 3, + 7, + Some([ + TableFeature::DeletionVectors, + TableFeature::V2Checkpoint, + TableFeature::TimestampWithoutTimezone, + ]), + Some([ + TableFeature::DeletionVectors, + TableFeature::V2Checkpoint, + TableFeature::TimestampWithoutTimezone, + ]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + } + + #[test] + fn test_validate_table_features_valid_writer_only() { + // Valid: Writer-only feature in writer list only + let protocol = Protocol::try_new( + 3, + 7, + Some::>(vec![]), + Some([TableFeature::AppendOnly]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + + // Valid: Multiple Writer-only features + let protocol = Protocol::try_new( + 3, + 7, + Some::>(vec![]), + Some([ + TableFeature::AppendOnly, + TableFeature::Invariants, + TableFeature::DomainMetadata, + ]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + } + + #[test] + fn test_validate_table_features_valid_mixed() { + // Valid: Mix of Writer-only and ReaderWriter features + let protocol = Protocol::try_new( + 3, + 7, + Some([TableFeature::DeletionVectors]), + Some([ + TableFeature::AppendOnly, + TableFeature::DeletionVectors, + TableFeature::Invariants, + ]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + } + + #[test] + fn test_validate_table_features_unknown_features() { + // Valid: Unknown features are allowed + let protocol = Protocol::try_new( + 3, + 7, + Some([TableFeature::Unknown("unknown-reader".to_string())]), + Some([TableFeature::Unknown("unknown-reader".to_string())]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + + // Valid: Unknown writer feature + let protocol = Protocol::try_new( + 3, + 7, + Some::>(vec![]), + Some([TableFeature::Unknown("unknown-writer".to_string())]), + ) + .unwrap(); + assert!(protocol.validate_table_features().is_ok()); + } + + #[test] + fn test_validate_table_features_invalid_readerwriter_missing_in_writer() { + // Invalid: ReaderWriter feature in reader but not in writer + let result = Protocol::try_new( + 3, + 7, + Some([TableFeature::DeletionVectors]), + Some::>(vec![]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) if msg.contains("Reader features must contain only ReaderWriter features that are also listed in writer features") => {}, + _ => panic!("Expected InvalidProtocol error about missing writer feature"), + } + } + + #[test] + fn test_validate_table_features_invalid_readerwriter_missing_in_reader() { + // Invalid: ReaderWriter feature in writer but not in reader + let result = Protocol::try_new( + 3, + 7, + Some::>(vec![]), + Some([TableFeature::DeletionVectors]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Writer features must be Writer-only or also listed in reader features", + ) => {} + _ => panic!("Expected InvalidProtocol error about missing reader feature"), + } + } + + #[test] + fn test_validate_table_features_invalid_writer_only_in_reader() { + // Invalid: Writer-only feature in reader list + let result = Protocol::try_new( + 3, + 7, + Some([TableFeature::AppendOnly]), + Some([TableFeature::AppendOnly]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains("Reader features must contain only ReaderWriter features") => {} + _ => panic!("Expected InvalidProtocol error about writer-only feature in reader list"), + } + } + + #[test] + fn test_validate_table_features_no_features() { + // Valid: No features at all (legacy protocol) + let protocol = Protocol { + min_reader_version: 1, + min_writer_version: 1, + reader_features: None, + writer_features: None, + }; + assert!(protocol.validate_table_features().is_ok()); + } + + #[test] + fn test_validate_table_features_invalid_reader_without_writer() { + // Invalid: Reader features present but writer features absent with version 7 + // This fails because version 7 requires writer_features to be Some + let result = Protocol::try_new( + 3, + 7, + Some([TableFeature::DeletionVectors]), + None::>, + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Writer features must be present when minimum writer version = 7", + ) => {} + _ => panic!("Expected InvalidProtocol error about missing writer features"), + } + } + + #[test] + fn test_version_features_validation_reader_v1_with_features() { + // Invalid: Reader features with version 1 + let result = Protocol::try_new( + 1, + 7, + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Reader features must not be present when minimum reader version != 3", + ) => {} + _ => panic!("Expected InvalidProtocol error for reader features with version 1"), + } + } + + #[test] + fn test_version_features_validation_reader_v2_with_features() { + // Invalid: Reader features with version 2 + let result = Protocol::try_new( + 2, + 7, + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Reader features must not be present when minimum reader version != 3", + ) => {} + _ => panic!("Expected InvalidProtocol error for reader features with version 2"), + } + } + + #[test] + fn test_version_features_validation_writer_v1_with_features() { + // Invalid: Writer features with version 1 + let result = Protocol::try_new( + 3, + 1, + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Writer features must not be present when minimum writer version != 7", + ) => {} + _ => panic!("Expected InvalidProtocol error for writer features with version 1"), + } + } + + #[test] + fn test_version_features_validation_writer_v2_with_features() { + // Invalid: Writer features with version 2 + let result = Protocol::try_new( + 3, + 2, + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Writer features must not be present when minimum writer version != 7", + ) => {} + _ => panic!("Expected InvalidProtocol error for writer features with version 2"), + } + } + + #[test] + fn test_version_features_validation_writer_v6_with_features() { + // Invalid: Writer features with version 6 (or any version != 7) + let result = Protocol::try_new( + 3, + 6, + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + ); + assert!(result.is_err()); + match result { + Err(Error::InvalidProtocol(msg)) + if msg.contains( + "Writer features must not be present when minimum writer version != 7", + ) => {} + _ => panic!("Expected InvalidProtocol error for writer features with version 6"), + } + } + #[test] fn test_v2_checkpoint_supported() { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::V2Checkpoint]), - Some([ReaderFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -1309,21 +1653,11 @@ mod tests { }; assert!(protocol.ensure_read_supported().is_ok()); - let empty_features: [String; 0] = []; - let protocol = Protocol::try_new( - 3, - 7, - Some([ReaderFeature::V2Checkpoint]), - Some(&empty_features), - ) - .unwrap(); - assert!(protocol.ensure_read_supported().is_ok()); - let protocol = Protocol::try_new( 3, 7, - Some(&empty_features), - Some([WriterFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -1331,8 +1665,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::V2Checkpoint]), - Some([WriterFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), + Some([TableFeature::V2Checkpoint]), ) .unwrap(); assert!(protocol.ensure_read_supported().is_ok()); @@ -1361,41 +1695,39 @@ mod tests { 7, Some::>(vec![]), Some(vec![ - WriterFeature::AppendOnly, - WriterFeature::DeletionVectors, - WriterFeature::DomainMetadata, - WriterFeature::Invariants, - WriterFeature::RowTracking, + TableFeature::AppendOnly, + TableFeature::DomainMetadata, + TableFeature::Invariants, + TableFeature::RowTracking, ]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_ok()); // Verify that unsupported writer features are rejected - // NOTE: Unsupported reader features should not cause an error here let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::Unknown("unsupported reader".to_string())]), - Some([WriterFeature::IdentityColumns]), + Some::>(vec![]), + Some([TableFeature::IdentityColumns]), ) .unwrap(); assert_result_error_with_message( protocol.ensure_write_supported(), - r#"Unsupported: Unknown WriterFeatures: "identityColumns". Supported WriterFeatures: "appendOnly", "deletionVectors", "domainMetadata", "inCommitTimestamp", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, + r#"Unsupported: Unknown TableFeatures: "identityColumns". Supported TableFeatures: "appendOnly", "columnMapping", "deletionVectors", "domainMetadata", "inCommitTimestamp", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, ); // Unknown writer features should cause an error let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::Unknown("unsupported reader".to_string())]), - Some([WriterFeature::Unknown("unsupported writer".to_string())]), + Some::>(vec![]), + Some([TableFeature::Unknown("unsupported writer".to_string())]), ) .unwrap(); assert_result_error_with_message( protocol.ensure_write_supported(), - r#"Unsupported: Unknown WriterFeatures: "unsupported writer". Supported WriterFeatures: "appendOnly", "deletionVectors", "domainMetadata", "inCommitTimestamp", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, + r#"Unsupported: Unknown TableFeatures: "unsupported writer". Supported TableFeatures: "appendOnly", "columnMapping", "deletionVectors", "domainMetadata", "inCommitTimestamp", "invariants", "rowTracking", "timestampNtz", "variantType", "variantType-preview", "variantShredding-preview""#, ); } @@ -1407,7 +1739,7 @@ mod tests { Some::>(vec![]), Some(vec![ // No domain metadata even though that is required - WriterFeature::RowTracking, + TableFeature::RowTracking, ]), ) .unwrap(); @@ -1420,16 +1752,16 @@ mod tests { #[test] fn test_ensure_supported_features() { - let supported_features = [ReaderFeature::ColumnMapping, ReaderFeature::DeletionVectors]; - let table_features = vec![ReaderFeature::ColumnMapping]; + let supported_features = [TableFeature::ColumnMapping, TableFeature::DeletionVectors]; + let table_features = vec![TableFeature::ColumnMapping]; ensure_supported_features(&table_features, &supported_features).unwrap(); // test unknown features - let table_features = vec![ReaderFeature::ColumnMapping, ReaderFeature::unknown("idk")]; + let table_features = vec![TableFeature::ColumnMapping, TableFeature::unknown("idk")]; let error = ensure_supported_features(&table_features, &supported_features).unwrap_err(); match error { Error::Unsupported(e) if e == - "Unknown ReaderFeatures: \"idk\". Supported ReaderFeatures: \"columnMapping\", \"deletionVectors\"" + "Unknown TableFeatures: \"idk\". Supported TableFeatures: \"columnMapping\", \"deletionVectors\"" => {}, _ => panic!("Expected unsupported error, got: {error}"), } @@ -1439,16 +1771,16 @@ mod tests { fn test_parse_table_feature_never_fails() { // parse a non-str let features = Some([5]); - let expected = Some(vec![ReaderFeature::unknown("5")]); - assert_eq!(parse_features::(features), expected); + let expected = Some(vec![TableFeature::unknown("5")]); + assert_eq!(parse_features::(features), expected); // weird strs let features = Some(["", "absurD_)(+13%^⚙️"]); let expected = Some(vec![ - ReaderFeature::unknown(""), - ReaderFeature::unknown("absurD_)(+13%^⚙️"), + TableFeature::unknown(""), + TableFeature::unknown("absurD_)(+13%^⚙️"), ]); - assert_eq!(parse_features::(features), expected); + assert_eq!(parse_features::(features), expected); } #[test] @@ -1456,16 +1788,16 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::CatalogManaged]), - Some([WriterFeature::CatalogManaged]), + Some([TableFeature::CatalogManaged]), + Some([TableFeature::CatalogManaged]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_err()); let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::CatalogOwnedPreview]), - Some([WriterFeature::CatalogOwnedPreview]), + Some([TableFeature::CatalogOwnedPreview]), + Some([TableFeature::CatalogOwnedPreview]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_err()); @@ -1809,8 +2141,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::ColumnMapping]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); @@ -1843,7 +2175,7 @@ mod tests { let string_builder = StringBuilder::new(); let mut list_builder = ListBuilder::new(string_builder).with_field(list_field.clone()); - list_builder.values().append_value("columnMapping"); + list_builder.values().append_value("deletionVectors"); list_builder.append(true); let reader_features_array = list_builder.finish(); diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index 63d4e284d..32ae1530d 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -686,7 +686,7 @@ mod tests { use crate::engine::sync::SyncEngine; use crate::expressions::{column_expr_ref, Expression}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::{action_batch, parse_json_batch}; use crate::Engine; @@ -697,8 +697,8 @@ mod tests { let expected = Protocol { min_reader_version: 3, min_writer_version: 7, - reader_features: Some(vec![ReaderFeature::DeletionVectors]), - writer_features: Some(vec![WriterFeature::DeletionVectors]), + reader_features: Some(vec![TableFeature::DeletionVectors]), + writer_features: Some(vec![TableFeature::DeletionVectors]), }; assert_eq!(parsed, expected); Ok(()) diff --git a/kernel/src/engine/arrow_data.rs b/kernel/src/engine/arrow_data.rs index b6a48e581..a45b6109d 100644 --- a/kernel/src/engine/arrow_data.rs +++ b/kernel/src/engine/arrow_data.rs @@ -352,7 +352,7 @@ mod tests { use crate::engine::sync::SyncEngine; use crate::expressions::ArrayData; use crate::schema::{ArrayType, DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::{assert_result_error_with_message, string_array_to_engine_data}; use crate::{DeltaResult, Engine as _, EngineData as _}; @@ -394,11 +394,11 @@ mod tests { assert_eq!(protocol.min_writer_version(), 7); assert_eq!( protocol.reader_features(), - Some([ReaderFeature::unknown("rw1")].as_slice()) + Some([TableFeature::unknown("rw1")].as_slice()) ); assert_eq!( protocol.writer_features(), - Some([WriterFeature::unknown("rw1"), WriterFeature::unknown("w2")].as_slice()) + Some([TableFeature::unknown("rw1"), TableFeature::unknown("w2")].as_slice()) ); Ok(()) } diff --git a/kernel/src/schema/variant_utils.rs b/kernel/src/schema/variant_utils.rs index c9bb88bd8..a1ec9aa66 100644 --- a/kernel/src/schema/variant_utils.rs +++ b/kernel/src/schema/variant_utils.rs @@ -2,7 +2,7 @@ use crate::actions::Protocol; use crate::schema::{Schema, SchemaTransform, StructType}; -use crate::table_features::{ReaderFeature, WriterFeature}; +use crate::table_features::TableFeature; use crate::utils::require; use crate::{DeltaResult, Error}; use std::borrow::Cow; @@ -24,10 +24,8 @@ pub(crate) fn validate_variant_type_feature_support( ) -> DeltaResult<()> { // Both the reader and writer need to have either the VariantType or the VariantTypePreview // features. - if (!protocol.has_reader_feature(&ReaderFeature::VariantType) - && !protocol.has_reader_feature(&ReaderFeature::VariantTypePreview)) - || (!protocol.has_writer_feature(&WriterFeature::VariantType) - && !protocol.has_writer_feature(&WriterFeature::VariantTypePreview)) + if !protocol.has_table_feature(&TableFeature::VariantType) + && !protocol.has_table_feature(&TableFeature::VariantTypePreview) { let mut uses_variant = UsesVariant::default(); let _ = uses_variant.transform_struct(schema); @@ -46,7 +44,7 @@ mod tests { use super::*; use crate::actions::Protocol; use crate::schema::{DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::assert_result_error_with_message; #[test] @@ -74,10 +72,10 @@ mod tests { #[test] fn test_variant_feature_validation() { let features = [ - (ReaderFeature::VariantType, WriterFeature::VariantType), + (TableFeature::VariantType, TableFeature::VariantType), ( - ReaderFeature::VariantTypePreview, - WriterFeature::VariantTypePreview, + TableFeature::VariantTypePreview, + TableFeature::VariantTypePreview, ), ]; let schema_with_variant = StructType::new_unchecked([ @@ -120,16 +118,6 @@ mod tests { ) .unwrap(); - // Protocol without variantType writer feature - let protocol_without_writer_feature = - Protocol::try_new(3, 7, Some([variant_reader]), Some::>(vec![])) - .unwrap(); - - // Protocol without variantType reader feature - let protocol_without_reader_feature = - Protocol::try_new(3, 7, Some::>(vec![]), Some([variant_writer])) - .unwrap(); - // Schema with VARIANT + Protocol with features = OK validate_variant_type_feature_support( &schema_with_variant, @@ -163,20 +151,6 @@ mod tests { &protocol_without_features, ); assert_result_error_with_message(result, "Unsupported: Table contains VARIANT columns but does not have the required 'variantType' feature in reader and writer features"); - - // Schema with VARIANT + Protocol without writer feature = ERROR - let result = validate_variant_type_feature_support( - &schema_with_variant, - &protocol_without_writer_feature, - ); - assert_result_error_with_message(result, "Unsupported: Table contains VARIANT columns but does not have the required 'variantType' feature in reader and writer features"); - - // Schema with VARIANT + Protocol without reader feature = ERROR - let result = validate_variant_type_feature_support( - &schema_with_variant, - &protocol_without_reader_feature, - ); - assert_result_error_with_message(result, "Unsupported: Table contains VARIANT columns but does not have the required 'variantType' feature in reader and writer features"); }); } } diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs index 9665a7497..4e9162ab1 100644 --- a/kernel/src/snapshot.rs +++ b/kernel/src/snapshot.rs @@ -463,20 +463,18 @@ mod tests { let reader_version = min_reader_version.unwrap_or(1); if ict_enabled { - let mut protocol = json!({ + // When ICT is enabled, we need minReaderVersion >= 3 and minWriterVersion = 7 + // inCommitTimestamp is a Writer-only feature, so it only goes in writerFeatures + // When using table features (writer version 7), reader version must be at least 3 + let reader_version = reader_version.max(3); + json!({ "protocol": { "minReaderVersion": reader_version, "minWriterVersion": 7, + "readerFeatures": [], "writerFeatures": ["inCommitTimestamp"] } - }); - - // Only include readerFeatures if minReaderVersion >= 3 - if reader_version >= 3 { - protocol["protocol"]["readerFeatures"] = json!([]); - } - - protocol + }) } else { json!({ "protocol": { diff --git a/kernel/src/table_changes/log_replay/tests.rs b/kernel/src/table_changes/log_replay/tests.rs index f1278504a..4c2d6a23f 100644 --- a/kernel/src/table_changes/log_replay/tests.rs +++ b/kernel/src/table_changes/log_replay/tests.rs @@ -10,7 +10,7 @@ use crate::scan::state::DvInfo; use crate::scan::PhysicalPredicate; use crate::schema::{DataType, StructField, StructType}; use crate::table_changes::log_replay::LogReplayScanner; -use crate::table_features::ReaderFeature; +use crate::table_features::TableFeature; use crate::utils::test_utils::{assert_result_error_with_message, Action, LocalMockTable}; use crate::Predicate; use crate::{DeltaResult, Engine, Error, Version}; @@ -79,8 +79,8 @@ async fn metadata_protocol() { Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([ReaderFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(), ), @@ -138,8 +138,8 @@ async fn unsupported_reader_feature() { Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]), - Some([""; 0]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), ) .unwrap(), )]) @@ -566,7 +566,7 @@ async fn failing_protocol() { let protocol = Protocol::try_new( 3, - 1, + 7, ["fake_feature".to_string()].into(), ["fake_feature".to_string()].into(), ) diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index c5898e0eb..3513bd6ec 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -41,7 +41,7 @@ use crate::log_segment::LogSegment; use crate::path::AsUrl; use crate::schema::{DataType, Schema, StructField, StructType}; use crate::snapshot::{Snapshot, SnapshotRef}; -use crate::table_features::{ColumnMappingMode, ReaderFeature}; +use crate::table_features::{ColumnMappingMode, TableFeature}; use crate::table_properties::TableProperties; use crate::utils::require; use crate::{DeltaResult, Engine, Error, Version}; @@ -279,8 +279,8 @@ fn check_cdf_table_properties(table_properties: &TableProperties) -> DeltaResult /// Ensures that Change Data Feed is supported for a table with this [`Protocol`] . /// See the documentation of [`TableChanges`] for more details. fn ensure_cdf_read_supported(protocol: &Protocol) -> DeltaResult<()> { - static CDF_SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| vec![ReaderFeature::DeletionVectors]); + static CDF_SUPPORTED_READER_FEATURES: LazyLock> = + LazyLock::new(|| vec![TableFeature::DeletionVectors]); match &protocol.reader_features() { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if protocol.min_reader_version() == 3 => { diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index b5586d0e4..d50b9aaa3 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -17,7 +17,7 @@ use crate::schema::variant_utils::validate_variant_type_feature_support; use crate::schema::{InvariantChecker, SchemaRef}; use crate::table_features::{ column_mapping_mode, validate_schema_column_mapping, validate_timestamp_ntz_feature_support, - ColumnMappingMode, ReaderFeature, WriterFeature, + ColumnMappingMode, TableFeature, }; use crate::table_properties::TableProperties; use crate::{DeltaResult, Error, Version}; @@ -208,8 +208,8 @@ impl TableConfiguration { /// [`TableChanges`]: crate::table_changes::TableChanges #[internal_api] pub(crate) fn is_cdf_read_supported(&self) -> bool { - static CDF_SUPPORTED_READER_FEATURES: LazyLock> = - LazyLock::new(|| vec![ReaderFeature::DeletionVectors]); + static CDF_SUPPORTED_READER_FEATURES: LazyLock> = + LazyLock::new(|| vec![TableFeature::DeletionVectors]); let protocol_supported = match self.protocol.reader_features() { // if min_reader_version = 3 and all reader features are subset of supported => OK Some(reader_features) if self.protocol.min_reader_version() == 3 => { @@ -239,15 +239,10 @@ impl TableConfiguration { #[internal_api] #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_supported(&self) -> bool { - let read_supported = self - .protocol() - .has_reader_feature(&ReaderFeature::DeletionVectors) - && self.protocol.min_reader_version() == 3; - let write_supported = self - .protocol() - .has_writer_feature(&WriterFeature::DeletionVectors) - && self.protocol.min_writer_version() == 7; - read_supported && write_supported + self.protocol() + .has_table_feature(&TableFeature::DeletionVectors) + && self.protocol.min_reader_version() == 3 + && self.protocol.min_writer_version() == 7 } /// Returns `true` if writing deletion vectors is enabled for this table. This is the case @@ -267,12 +262,12 @@ impl TableConfiguration { /// Returns `true` if the table supports the appendOnly table feature. To support this feature: /// - The table must have a writer version between 2 and 7 (inclusive) - /// - If the table is on writer version 7, it must have the [`WriterFeature::AppendOnly`] + /// - If the table is on writer version 7, it must have the [`TableFeature::AppendOnly`] /// writer feature. pub(crate) fn is_append_only_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_writer_feature(&WriterFeature::AppendOnly) => true, + 7 if protocol.has_table_feature(&TableFeature::AppendOnly) => true, version => (2..=6).contains(&version), } } @@ -286,7 +281,7 @@ impl TableConfiguration { pub(crate) fn is_invariants_supported(&self) -> bool { let protocol = &self.protocol; match protocol.min_writer_version() { - 7 if protocol.has_writer_feature(&WriterFeature::Invariants) => true, + 7 if protocol.has_table_feature(&TableFeature::Invariants) => true, version => (2..=6).contains(&version), } } @@ -297,26 +292,21 @@ impl TableConfiguration { /// /// See: pub(crate) fn is_v2_checkpoint_write_supported(&self) -> bool { - let read_supported = self - .protocol() - .has_reader_feature(&ReaderFeature::V2Checkpoint); - let write_supported = self - .protocol() - .has_writer_feature(&WriterFeature::V2Checkpoint); - read_supported && write_supported + self.protocol() + .has_table_feature(&TableFeature::V2Checkpoint) } /// Returns `true` if the table supports writing in-commit timestamps. /// /// To support this feature the table must: /// - Have a min_writer_version of 7 - /// - Have the [`WriterFeature::InCommitTimestamp`] writer feature. + /// - Have the [`TableFeature::InCommitTimestamp`] writer feature. #[allow(unused)] pub(crate) fn is_in_commit_timestamps_supported(&self) -> bool { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&WriterFeature::InCommitTimestamp) + .has_table_feature(&TableFeature::InCommitTimestamp) } /// Returns `true` if in-commit timestamps is supported and it is enabled. In-commit timestamps @@ -369,25 +359,25 @@ impl TableConfiguration { /// /// To support this feature the table must: /// - Have a min_writer_version of 7. - /// - Have the [`WriterFeature::DomainMetadata`] writer feature. + /// - Have the [`TableFeature::DomainMetadata`] writer feature. #[allow(unused)] pub(crate) fn is_domain_metadata_supported(&self) -> bool { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&WriterFeature::DomainMetadata) + .has_table_feature(&TableFeature::DomainMetadata) } /// Returns `true` if the table supports writing row tracking metadata. /// /// To support this feature the table must: /// - Have a min_writer_version of 7. - /// - Have the [`WriterFeature::RowTracking`] writer feature. + /// - Have the [`TableFeature::RowTracking`] writer feature. pub(crate) fn is_row_tracking_supported(&self) -> bool { self.protocol().min_writer_version() == 7 && self .protocol() - .has_writer_feature(&WriterFeature::RowTracking) + .has_table_feature(&TableFeature::RowTracking) } /// Returns `true` if row tracking is enabled for this table. @@ -433,7 +423,7 @@ mod test { use crate::actions::{Metadata, Protocol}; use crate::schema::{DataType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::table_properties::TableProperties; use crate::utils::test_utils::assert_result_error_with_message; use crate::Error; @@ -455,8 +445,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -485,8 +475,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -513,7 +503,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -557,7 +547,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -598,7 +588,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -618,7 +608,7 @@ mod test { 3, 7, Some::>(vec![]), - Some([WriterFeature::InCommitTimestamp]), + Some([TableFeature::InCommitTimestamp]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -652,8 +642,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::TimestampWithoutTimezone]), - Some([WriterFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -677,8 +667,8 @@ mod test { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - Some([WriterFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); let table_root = Url::try_from("file:///").unwrap(); @@ -707,11 +697,11 @@ mod test { let new_protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors, ReaderFeature::V2Checkpoint]), + Some([TableFeature::DeletionVectors, TableFeature::V2Checkpoint]), Some([ - WriterFeature::DeletionVectors, - WriterFeature::V2Checkpoint, - WriterFeature::AppendOnly, + TableFeature::DeletionVectors, + TableFeature::V2Checkpoint, + TableFeature::AppendOnly, ]), ) .unwrap(); @@ -761,8 +751,8 @@ mod test { let protocol_with_timestamp_ntz_features = Protocol::try_new( 3, 7, - Some([ReaderFeature::TimestampWithoutTimezone]), - Some([WriterFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), ) .unwrap(); @@ -806,8 +796,8 @@ mod test { let protocol_with_variant_features = Protocol::try_new( 3, 7, - Some([ReaderFeature::VariantType]), - Some([WriterFeature::VariantType]), + Some([TableFeature::VariantType]), + Some([TableFeature::VariantType]), ) .unwrap(); diff --git a/kernel/src/table_features/column_mapping.rs b/kernel/src/table_features/column_mapping.rs index cf92d6952..2313a5dfc 100644 --- a/kernel/src/table_features/column_mapping.rs +++ b/kernel/src/table_features/column_mapping.rs @@ -1,5 +1,5 @@ //! Code to handle column mapping, including modes and schema transforms -use super::ReaderFeature; +use super::TableFeature; use crate::actions::Protocol; use crate::schema::{ ColumnName, DataType, MetadataValue, Schema, SchemaTransform, StructField, StructType, @@ -38,7 +38,7 @@ pub(crate) fn column_mapping_mode( // (but should be ignored) even when the feature is not supported. For details see // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#column-mapping (Some(mode), 2) => mode, - (Some(mode), 3) if protocol.has_reader_feature(&ReaderFeature::ColumnMapping) => mode, + (Some(mode), 3) if protocol.has_table_feature(&TableFeature::ColumnMapping) => mode, _ => ColumnMappingMode::None, } } @@ -204,8 +204,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::ColumnMapping]), - empty_features.clone(), + Some([TableFeature::ColumnMapping]), + Some([TableFeature::ColumnMapping]), ) .unwrap(); @@ -222,8 +222,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors]), - empty_features.clone(), + Some([TableFeature::DeletionVectors]), + Some([TableFeature::DeletionVectors]), ) .unwrap(); @@ -240,8 +240,8 @@ mod tests { let protocol = Protocol::try_new( 3, 7, - Some([ReaderFeature::DeletionVectors, ReaderFeature::ColumnMapping]), - empty_features, + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), + Some([TableFeature::DeletionVectors, TableFeature::ColumnMapping]), ) .unwrap(); diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index 189eaf856..990b50828 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -1,11 +1,15 @@ +use std::collections::HashSet; use std::sync::LazyLock; use serde::{Deserialize, Serialize}; use strum::{AsRefStr, Display as StrumDisplay, EnumCount, EnumString}; +use crate::actions::Protocol; use crate::expressions::Scalar; use crate::schema::derive_macro_utils::ToDataType; use crate::schema::DataType; +use crate::table_properties::TableProperties; +use crate::{DeltaResult, Error}; use delta_kernel_derive::internal_api; pub(crate) use column_mapping::column_mapping_mode; @@ -14,12 +18,12 @@ pub(crate) use timestamp_ntz::validate_timestamp_ntz_feature_support; mod column_mapping; mod timestamp_ntz; -/// Reader features communicate capabilities that must be implemented in order to correctly read a -/// given table. That is, readers must implement and respect all features listed in a table's -/// `ReaderFeatures`. Note that any feature listed as a `ReaderFeature` must also have a -/// corresponding `WriterFeature`. +/// Table features communicate capabilities that must be implemented in order to correctly +/// read or write a given table. Features can be: +/// - Writer-only: Only affect write operations +/// - ReaderWriter: Affect both read and write operations (must appear in both feature lists) /// -/// The kernel currently supports all reader features except for V2Checkpoints. +/// The kernel currently supports most table features with some limitations. #[derive( Serialize, Deserialize, @@ -36,72 +40,8 @@ mod timestamp_ntz; #[strum(serialize_all = "camelCase")] #[serde(rename_all = "camelCase")] #[internal_api] -pub(crate) enum ReaderFeature { - /// CatalogManaged tables: - /// - CatalogManaged, - #[strum(serialize = "catalogOwned-preview")] - #[serde(rename = "catalogOwned-preview")] - CatalogOwnedPreview, - /// Mapping of one column to another - ColumnMapping, - /// Deletion vectors for merge, update, delete - DeletionVectors, - /// timestamps without timezone support - #[strum(serialize = "timestampNtz")] - #[serde(rename = "timestampNtz")] - TimestampWithoutTimezone, - // Allow columns to change type - TypeWidening, - #[strum(serialize = "typeWidening-preview")] - #[serde(rename = "typeWidening-preview")] - TypeWideningPreview, - /// version 2 of checkpointing - V2Checkpoint, - /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer - /// protocol checks during VACUUM operations - VacuumProtocolCheck, - /// This feature enables support for the variant data type, which stores semi-structured data. - VariantType, - #[strum(serialize = "variantType-preview")] - #[serde(rename = "variantType-preview")] - VariantTypePreview, - #[strum(serialize = "variantShredding-preview")] - #[serde(rename = "variantShredding-preview")] - VariantShreddingPreview, - #[serde(untagged)] - #[strum(default)] - Unknown(String), -} - -/// Similar to reader features, writer features communicate capabilities that must be implemented -/// in order to correctly write to a given table. That is, writers must implement and respect all -/// features listed in a table's `WriterFeatures`. -/// -/// Kernel write support is currently in progress and as such these are not supported. -#[derive( - Serialize, - Deserialize, - Debug, - Clone, - Eq, - PartialEq, - EnumString, - StrumDisplay, - AsRefStr, - EnumCount, - Hash, -)] -#[strum(serialize_all = "camelCase")] -#[serde(rename_all = "camelCase")] -#[internal_api] -pub(crate) enum WriterFeature { - /// CatalogManaged tables: - /// - CatalogManaged, - #[strum(serialize = "catalogOwned-preview")] - #[serde(rename = "catalogOwned-preview")] - CatalogOwnedPreview, +pub(crate) enum TableFeature { + // Writer-only features /// Append Only Tables AppendOnly, /// Table invariants @@ -112,16 +52,35 @@ pub(crate) enum WriterFeature { ChangeDataFeed, /// Columns with generated values GeneratedColumns, - /// Mapping of one column to another - ColumnMapping, /// ID Columns IdentityColumns, /// Monotonically increasing timestamps in the CommitInfo InCommitTimestamp, - /// Deletion vectors for merge, update, delete - DeletionVectors, /// Row tracking on tables RowTracking, + /// domain specific metadata + DomainMetadata, + /// Iceberg compatibility support + IcebergCompatV1, + /// Iceberg compatibility support + IcebergCompatV2, + /// The Clustered Table feature facilitates the physical clustering of rows + /// that share similar values on a predefined set of clustering columns. + #[strum(serialize = "clustering")] + #[serde(rename = "clustering")] + ClusteredTable, + + // ReaderWriter features + /// CatalogManaged tables: + /// + CatalogManaged, + #[strum(serialize = "catalogOwned-preview")] + #[serde(rename = "catalogOwned-preview")] + CatalogOwnedPreview, + /// Mapping of one column to another + ColumnMapping, + /// Deletion vectors for merge, update, delete + DeletionVectors, /// timestamps without timezone support #[strum(serialize = "timestampNtz")] #[serde(rename = "timestampNtz")] @@ -131,22 +90,11 @@ pub(crate) enum WriterFeature { #[strum(serialize = "typeWidening-preview")] #[serde(rename = "typeWidening-preview")] TypeWideningPreview, - /// domain specific metadata - DomainMetadata, /// version 2 of checkpointing V2Checkpoint, - /// Iceberg compatibility support - IcebergCompatV1, - /// Iceberg compatibility support - IcebergCompatV2, /// vacuumProtocolCheck ReaderWriter feature ensures consistent application of reader and writer /// protocol checks during VACUUM operations VacuumProtocolCheck, - /// The Clustered Table feature facilitates the physical clustering of rows - /// that share similar values on a predefined set of clustering columns. - #[strum(serialize = "clustering")] - #[serde(rename = "clustering")] - ClusteredTable, /// This feature enables support for the variant data type, which stores semi-structured data. VariantType, #[strum(serialize = "variantType-preview")] @@ -155,70 +103,448 @@ pub(crate) enum WriterFeature { #[strum(serialize = "variantShredding-preview")] #[serde(rename = "variantShredding-preview")] VariantShreddingPreview, + #[serde(untagged)] #[strum(default)] Unknown(String), } -impl ToDataType for ReaderFeature { - fn to_data_type() -> DataType { - DataType::STRING - } +/// Classifies table features by their type +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FeatureType { + /// Feature only affects write operations + Writer, + /// Feature affects both read and write operations (must appear in both feature lists) + ReaderWriter, + /// Unknown feature type (for forward compatibility) + Unknown, } -impl ToDataType for WriterFeature { - fn to_data_type() -> DataType { - DataType::STRING - } +/// Defines how a feature's enablement is determined +#[derive(Debug, Clone, Copy)] +pub(crate) enum EnablementCheck { + /// Feature is enabled if it's supported (appears in protocol feature lists) + AlwaysIfSupported, + /// Feature is enabled if supported AND the provided function returns true when checking table properties + EnabledIf(fn(&TableProperties) -> bool), } -impl From for Scalar { - fn from(feature: ReaderFeature) -> Self { - Scalar::String(feature.to_string()) +/// Rich metadata about a table feature including version requirements, dependencies, and support status +#[derive(Debug, Clone)] +pub(crate) struct FeatureInfo { + /// The feature's canonical name as it appears in the protocol + pub name: &'static str, + /// Minimum reader protocol version required for this feature + pub min_reader_version: i32, + /// Minimum writer protocol version required for this feature + pub min_writer_version: i32, + /// The type of feature (Writer, ReaderWriter, or Unknown) + pub feature_type: FeatureType, + /// Other features that must be present for this feature to work + pub required_features: &'static [TableFeature], + /// Whether the Rust kernel has read support for this feature + pub has_read_support: bool, + /// Whether the Rust kernel has write support for this feature + pub has_write_support: bool, + /// How to check if this feature is enabled in a table + pub enablement_check: EnablementCheck, +} + +// Static FeatureInfo instances for each table feature +static APPEND_ONLY_INFO: FeatureInfo = FeatureInfo { + name: "appendOnly", + min_reader_version: 1, + min_writer_version: 2, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: true, + enablement_check: EnablementCheck::EnabledIf(|props| props.append_only == Some(true)), +}; + +static INVARIANTS_INFO: FeatureInfo = FeatureInfo { + name: "invariants", + min_reader_version: 1, + min_writer_version: 2, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: true, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static CHECK_CONSTRAINTS_INFO: FeatureInfo = FeatureInfo { + name: "checkConstraints", + min_reader_version: 1, + min_writer_version: 3, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static CHANGE_DATA_FEED_INFO: FeatureInfo = FeatureInfo { + name: "changeDataFeed", + min_reader_version: 1, + min_writer_version: 4, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::EnabledIf(|props| { + props.enable_change_data_feed == Some(true) + }), +}; + +static GENERATED_COLUMNS_INFO: FeatureInfo = FeatureInfo { + name: "generatedColumns", + min_reader_version: 1, + min_writer_version: 4, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static IDENTITY_COLUMNS_INFO: FeatureInfo = FeatureInfo { + name: "identityColumns", + min_reader_version: 1, + min_writer_version: 6, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static IN_COMMIT_TIMESTAMP_INFO: FeatureInfo = FeatureInfo { + name: "inCommitTimestamp", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: true, + enablement_check: EnablementCheck::EnabledIf(|props| { + props.enable_in_commit_timestamps == Some(true) + }), +}; + +static ROW_TRACKING_INFO: FeatureInfo = FeatureInfo { + name: "rowTracking", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::Writer, + required_features: &[TableFeature::DomainMetadata], + has_read_support: false, + has_write_support: true, + enablement_check: EnablementCheck::EnabledIf(|props| props.enable_row_tracking == Some(true)), +}; + +static DOMAIN_METADATA_INFO: FeatureInfo = FeatureInfo { + name: "domainMetadata", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: true, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static ICEBERG_COMPAT_V1_INFO: FeatureInfo = FeatureInfo { + name: "icebergCompatV1", + min_reader_version: 1, + min_writer_version: 7, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static ICEBERG_COMPAT_V2_INFO: FeatureInfo = FeatureInfo { + name: "icebergCompatV2", + min_reader_version: 2, + min_writer_version: 7, + feature_type: FeatureType::Writer, + required_features: &[TableFeature::ColumnMapping], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static CLUSTERED_TABLE_INFO: FeatureInfo = FeatureInfo { + name: "clustering", + min_reader_version: 1, + min_writer_version: 7, + feature_type: FeatureType::Writer, + required_features: &[], + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static CATALOG_MANAGED_INFO: FeatureInfo = FeatureInfo { + name: "catalogManaged", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + #[cfg(feature = "catalog-managed")] + has_read_support: true, + #[cfg(not(feature = "catalog-managed"))] + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static CATALOG_OWNED_PREVIEW_INFO: FeatureInfo = FeatureInfo { + name: "catalogOwned-preview", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + #[cfg(feature = "catalog-managed")] + has_read_support: true, + #[cfg(not(feature = "catalog-managed"))] + has_read_support: false, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static COLUMN_MAPPING_INFO: FeatureInfo = FeatureInfo { + name: "columnMapping", + min_reader_version: 2, + min_writer_version: 5, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: false, + enablement_check: EnablementCheck::EnabledIf(|props| { + props.column_mapping_mode.is_some() + && props.column_mapping_mode != Some(ColumnMappingMode::None) + }), +}; + +static DELETION_VECTORS_INFO: FeatureInfo = FeatureInfo { + name: "deletionVectors", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: true, + enablement_check: EnablementCheck::EnabledIf(|props| { + props.enable_deletion_vectors == Some(true) + }), +}; + +static TIMESTAMP_WITHOUT_TIMEZONE_INFO: FeatureInfo = FeatureInfo { + name: "timestampNtz", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: true, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static TYPE_WIDENING_INFO: FeatureInfo = FeatureInfo { + name: "typeWidening", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static TYPE_WIDENING_PREVIEW_INFO: FeatureInfo = FeatureInfo { + name: "typeWidening-preview", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static V2_CHECKPOINT_INFO: FeatureInfo = FeatureInfo { + name: "v2Checkpoint", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static VACUUM_PROTOCOL_CHECK_INFO: FeatureInfo = FeatureInfo { + name: "vacuumProtocolCheck", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: false, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static VARIANT_TYPE_INFO: FeatureInfo = FeatureInfo { + name: "variantType", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: true, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static VARIANT_TYPE_PREVIEW_INFO: FeatureInfo = FeatureInfo { + name: "variantType-preview", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: true, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +static VARIANT_SHREDDING_PREVIEW_INFO: FeatureInfo = FeatureInfo { + name: "variantShredding-preview", + min_reader_version: 3, + min_writer_version: 7, + feature_type: FeatureType::ReaderWriter, + required_features: &[], + has_read_support: true, + has_write_support: true, + enablement_check: EnablementCheck::AlwaysIfSupported, +}; + +impl TableFeature { + /// Returns the feature type (Writer, ReaderWriter, or Unknown) + pub(crate) fn feature_type(&self) -> FeatureType { + match self { + // Writer-only features + TableFeature::AppendOnly => FeatureType::Writer, + TableFeature::Invariants => FeatureType::Writer, + TableFeature::CheckConstraints => FeatureType::Writer, + TableFeature::ChangeDataFeed => FeatureType::Writer, + TableFeature::GeneratedColumns => FeatureType::Writer, + TableFeature::IdentityColumns => FeatureType::Writer, + TableFeature::InCommitTimestamp => FeatureType::Writer, + TableFeature::RowTracking => FeatureType::Writer, + TableFeature::DomainMetadata => FeatureType::Writer, + TableFeature::IcebergCompatV1 => FeatureType::Writer, + TableFeature::IcebergCompatV2 => FeatureType::Writer, + TableFeature::ClusteredTable => FeatureType::Writer, + + // ReaderWriter features + TableFeature::CatalogManaged => FeatureType::ReaderWriter, + TableFeature::CatalogOwnedPreview => FeatureType::ReaderWriter, + TableFeature::ColumnMapping => FeatureType::ReaderWriter, + TableFeature::DeletionVectors => FeatureType::ReaderWriter, + TableFeature::TimestampWithoutTimezone => FeatureType::ReaderWriter, + TableFeature::TypeWidening => FeatureType::ReaderWriter, + TableFeature::TypeWideningPreview => FeatureType::ReaderWriter, + TableFeature::V2Checkpoint => FeatureType::ReaderWriter, + TableFeature::VacuumProtocolCheck => FeatureType::ReaderWriter, + TableFeature::VariantType => FeatureType::ReaderWriter, + TableFeature::VariantTypePreview => FeatureType::ReaderWriter, + TableFeature::VariantShreddingPreview => FeatureType::ReaderWriter, + + // Unknown features + TableFeature::Unknown(_) => FeatureType::Unknown, + } + } + + /// Returns rich metadata about this table feature including version requirements, + /// dependencies, and support status. For Unknown features, returns None. + pub(crate) fn info(&self) -> Option<&'static FeatureInfo> { + match self { + // Writer-only features + TableFeature::AppendOnly => Some(&APPEND_ONLY_INFO), + TableFeature::Invariants => Some(&INVARIANTS_INFO), + TableFeature::CheckConstraints => Some(&CHECK_CONSTRAINTS_INFO), + TableFeature::ChangeDataFeed => Some(&CHANGE_DATA_FEED_INFO), + TableFeature::GeneratedColumns => Some(&GENERATED_COLUMNS_INFO), + TableFeature::IdentityColumns => Some(&IDENTITY_COLUMNS_INFO), + TableFeature::InCommitTimestamp => Some(&IN_COMMIT_TIMESTAMP_INFO), + TableFeature::RowTracking => Some(&ROW_TRACKING_INFO), + TableFeature::DomainMetadata => Some(&DOMAIN_METADATA_INFO), + TableFeature::IcebergCompatV1 => Some(&ICEBERG_COMPAT_V1_INFO), + TableFeature::IcebergCompatV2 => Some(&ICEBERG_COMPAT_V2_INFO), + TableFeature::ClusteredTable => Some(&CLUSTERED_TABLE_INFO), + + // ReaderWriter features + TableFeature::CatalogManaged => Some(&CATALOG_MANAGED_INFO), + TableFeature::CatalogOwnedPreview => Some(&CATALOG_OWNED_PREVIEW_INFO), + TableFeature::ColumnMapping => Some(&COLUMN_MAPPING_INFO), + TableFeature::DeletionVectors => Some(&DELETION_VECTORS_INFO), + TableFeature::TimestampWithoutTimezone => Some(&TIMESTAMP_WITHOUT_TIMEZONE_INFO), + TableFeature::TypeWidening => Some(&TYPE_WIDENING_INFO), + TableFeature::TypeWideningPreview => Some(&TYPE_WIDENING_PREVIEW_INFO), + TableFeature::V2Checkpoint => Some(&V2_CHECKPOINT_INFO), + TableFeature::VacuumProtocolCheck => Some(&VACUUM_PROTOCOL_CHECK_INFO), + TableFeature::VariantType => Some(&VARIANT_TYPE_INFO), + TableFeature::VariantTypePreview => Some(&VARIANT_TYPE_PREVIEW_INFO), + TableFeature::VariantShreddingPreview => Some(&VARIANT_SHREDDING_PREVIEW_INFO), + + // Unknown features have no metadata + TableFeature::Unknown(_) => None, + } } } -impl From for Scalar { - fn from(feature: WriterFeature) -> Self { - Scalar::String(feature.to_string()) +impl ToDataType for TableFeature { + fn to_data_type() -> DataType { + DataType::STRING } } -#[cfg(test)] // currently only used in tests -impl ReaderFeature { - pub(crate) fn unknown(s: impl ToString) -> Self { - ReaderFeature::Unknown(s.to_string()) +impl From for Scalar { + fn from(feature: TableFeature) -> Self { + Scalar::String(feature.to_string()) } } #[cfg(test)] // currently only used in tests -impl WriterFeature { +impl TableFeature { pub(crate) fn unknown(s: impl ToString) -> Self { - WriterFeature::Unknown(s.to_string()) + TableFeature::Unknown(s.to_string()) } } -pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| { +pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| { vec![ #[cfg(feature = "catalog-managed")] - ReaderFeature::CatalogManaged, + TableFeature::CatalogManaged, #[cfg(feature = "catalog-managed")] - ReaderFeature::CatalogOwnedPreview, - ReaderFeature::ColumnMapping, - ReaderFeature::DeletionVectors, - ReaderFeature::TimestampWithoutTimezone, - ReaderFeature::TypeWidening, - ReaderFeature::TypeWideningPreview, - ReaderFeature::VacuumProtocolCheck, - ReaderFeature::V2Checkpoint, - ReaderFeature::VariantType, - ReaderFeature::VariantTypePreview, + TableFeature::CatalogOwnedPreview, + TableFeature::ColumnMapping, + TableFeature::DeletionVectors, + TableFeature::TimestampWithoutTimezone, + TableFeature::TypeWidening, + TableFeature::TypeWideningPreview, + TableFeature::VacuumProtocolCheck, + TableFeature::V2Checkpoint, + TableFeature::VariantType, + TableFeature::VariantTypePreview, // The default engine currently DOES NOT support shredded Variant reads and the parquet // reader will reject the read if it sees a shredded schema in the parquet file. That being // said, kernel does permit reconstructing shredded variants into the // `STRUCT` representation if parquet readers of // third-party engines support it. - ReaderFeature::VariantShreddingPreview, + TableFeature::VariantShreddingPreview, ] }); @@ -228,140 +554,535 @@ pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = Lazy /// - We only support DeletionVectors in that we never write them (no DML). /// - We support writing to existing tables with row tracking, but we don't support creating /// tables with row tracking yet. -pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = LazyLock::new(|| { +pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = LazyLock::new(|| { vec![ - WriterFeature::AppendOnly, - WriterFeature::DeletionVectors, - WriterFeature::DomainMetadata, - WriterFeature::InCommitTimestamp, - WriterFeature::Invariants, - WriterFeature::RowTracking, - WriterFeature::TimestampWithoutTimezone, - WriterFeature::VariantType, - WriterFeature::VariantTypePreview, - WriterFeature::VariantShreddingPreview, + TableFeature::AppendOnly, + TableFeature::ColumnMapping, + TableFeature::DeletionVectors, + TableFeature::DomainMetadata, + TableFeature::InCommitTimestamp, + TableFeature::Invariants, + TableFeature::RowTracking, + TableFeature::TimestampWithoutTimezone, + TableFeature::VariantType, + TableFeature::VariantTypePreview, + TableFeature::VariantShreddingPreview, ] }); +/// A unified interface for checking table feature support and enablement. +/// +/// This struct combines Protocol and TableProperties to provide a single source of truth for: +/// - Whether a feature is supported (present in protocol and meets version requirements) +/// - Whether a feature is enabled (supported AND table properties indicate it's active) +/// - Validation of read/write support +/// - Dependency validation between features +pub(crate) struct TableFeatures<'a> { + protocol: &'a Protocol, + properties: &'a TableProperties, +} + +impl<'a> TableFeatures<'a> { + /// Creates a new TableFeatures instance for checking feature support and enablement + pub(crate) fn new(protocol: &'a Protocol, properties: &'a TableProperties) -> Self { + Self { + protocol, + properties, + } + } + + /// Checks if a feature is supported by the table. + /// + /// A feature is supported if: + /// 1. For legacy protocol (reader < 3 or writer < 7): Protocol versions meet minimum requirements + /// 2. For table features protocol (reader >= 3 and writer >= 7): Feature is explicitly listed + /// in the appropriate feature list (reader_features or writer_features) + /// + /// This does NOT check if the feature is enabled via table properties. + pub(crate) fn is_supported(&self, feature: &TableFeature) -> bool { + let Some(info) = feature.info() else { + // Unknown features are not supported + return false; + }; + + // Check if using legacy protocol or table features protocol + let is_legacy_protocol = + self.protocol.min_reader_version() < 3 || self.protocol.min_writer_version() < 7; + + if is_legacy_protocol { + // For legacy protocol, check version requirements + self.protocol.min_reader_version() >= info.min_reader_version + && self.protocol.min_writer_version() >= info.min_writer_version + } else { + // For table features protocol, feature must be explicitly listed + self.is_in_feature_lists(feature) + } + } + + /// Checks if a feature is enabled in the table. + /// + /// A feature is enabled if: + /// 1. It is supported (see is_supported) + /// 2. The enablement check passes (either AlwaysIfSupported or the property check returns true) + pub(crate) fn is_enabled(&self, feature: &TableFeature) -> bool { + if !self.is_supported(feature) { + return false; + } + + let Some(info) = feature.info() else { + return false; + }; + + match info.enablement_check { + EnablementCheck::AlwaysIfSupported => true, + EnablementCheck::EnabledIf(check_fn) => check_fn(self.properties), + } + } + + /// Returns the set of all supported features for this table + pub(crate) fn get_supported_features(&self) -> HashSet { + let mut features = HashSet::new(); + + // Add all reader features if present + if let Some(reader_features) = self.protocol.reader_features() { + for feature in reader_features { + if self.is_supported(feature) { + features.insert(feature.clone()); + } + } + } + + // Add all writer features if present + if let Some(writer_features) = self.protocol.writer_features() { + for feature in writer_features { + if self.is_supported(feature) { + features.insert(feature.clone()); + } + } + } + + features + } + + /// Helper method to check if a feature appears in the protocol's feature lists. + /// This is used for table features protocol (min_reader >= 3 && min_writer >= 7). + pub(crate) fn is_in_feature_lists(&self, feature: &TableFeature) -> bool { + let feature_type = feature.feature_type(); + + match feature_type { + FeatureType::Writer => { + // Writer-only features must appear in writer_features + self.protocol + .writer_features() + .map(|features| features.contains(feature)) + .unwrap_or(false) + } + FeatureType::ReaderWriter => { + // ReaderWriter features must appear in both lists + let in_reader = self + .protocol + .reader_features() + .map(|features| features.contains(feature)) + .unwrap_or(false); + let in_writer = self + .protocol + .writer_features() + .map(|features| features.contains(feature)) + .unwrap_or(false); + + in_reader && in_writer + } + FeatureType::Unknown => false, + } + } + + /// Validates that the Rust kernel has read support for all features required by the table. + /// + /// Returns an error if any ReaderWriter feature lacks read support in the kernel. + /// Writer-only features are not checked since they don't affect reading. + pub(crate) fn validate_read_support(&self) -> DeltaResult<()> { + let supported_features = self.get_supported_features(); + + for feature in supported_features { + if let Some(info) = feature.info() { + // Only check ReaderWriter features for read support + if matches!(info.feature_type, FeatureType::ReaderWriter) && !info.has_read_support + { + return Err(Error::unsupported(format!( + "Table feature '{}' is not supported for reading", + feature + ))); + } + } + } + + Ok(()) + } + + /// Validates that the Rust kernel has write support for all features required by the table. + /// + /// Returns an error if any supported feature (Writer or ReaderWriter) lacks write support in the kernel. + pub(crate) fn validate_write_support(&self) -> DeltaResult<()> { + let supported_features = self.get_supported_features(); + + for feature in supported_features { + if let Some(info) = feature.info() { + // Check both Writer and ReaderWriter features for write support + if matches!( + info.feature_type, + FeatureType::Writer | FeatureType::ReaderWriter + ) && !info.has_write_support + { + return Err(Error::unsupported(format!( + "Table feature '{}' is not supported for writing", + feature + ))); + } + } + } + + Ok(()) + } + + /// Validates that all feature dependencies are satisfied. + /// + /// For each supported feature, checks that all its required features are also supported. + /// Returns an error if any dependency is missing. + pub(crate) fn validate_dependencies(&self) -> DeltaResult<()> { + let supported_features = self.get_supported_features(); + + for feature in &supported_features { + if let Some(info) = feature.info() { + for required_feature in info.required_features { + if !self.is_supported(required_feature) { + return Err(Error::generic(format!( + "Feature '{}' requires '{}' but it is not supported", + feature, required_feature + ))); + } + } + } + } + + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; #[test] fn test_unknown_features() { - let mixed_reader = &[ - ReaderFeature::DeletionVectors, - ReaderFeature::unknown("cool_feature"), - ReaderFeature::ColumnMapping, - ]; - let mixed_writer = &[ - WriterFeature::DeletionVectors, - WriterFeature::unknown("cool_feature"), - WriterFeature::AppendOnly, + let mixed_features = &[ + TableFeature::DeletionVectors, + TableFeature::unknown("cool_feature"), + TableFeature::ColumnMapping, + TableFeature::AppendOnly, ]; - let reader_string = serde_json::to_string(mixed_reader).unwrap(); - let writer_string = serde_json::to_string(mixed_writer).unwrap(); + let serialized = serde_json::to_string(mixed_features).unwrap(); assert_eq!( - &reader_string, - "[\"deletionVectors\",\"cool_feature\",\"columnMapping\"]" - ); - assert_eq!( - &writer_string, - "[\"deletionVectors\",\"cool_feature\",\"appendOnly\"]" + &serialized, + "[\"deletionVectors\",\"cool_feature\",\"columnMapping\",\"appendOnly\"]" ); - let typed_reader: Vec = serde_json::from_str(&reader_string).unwrap(); - let typed_writer: Vec = serde_json::from_str(&writer_string).unwrap(); + let deserialized: Vec = serde_json::from_str(&serialized).unwrap(); - assert_eq!(typed_reader.len(), 3); - assert_eq!(&typed_reader, mixed_reader); - assert_eq!(typed_writer.len(), 3); - assert_eq!(&typed_writer, mixed_writer); + assert_eq!(deserialized.len(), 4); + assert_eq!(&deserialized, mixed_features); } #[test] - fn test_roundtrip_reader_features() { + fn test_roundtrip_table_features() { let cases = [ - (ReaderFeature::CatalogManaged, "catalogManaged"), - (ReaderFeature::CatalogOwnedPreview, "catalogOwned-preview"), - (ReaderFeature::ColumnMapping, "columnMapping"), - (ReaderFeature::DeletionVectors, "deletionVectors"), - (ReaderFeature::TimestampWithoutTimezone, "timestampNtz"), - (ReaderFeature::TypeWidening, "typeWidening"), - (ReaderFeature::TypeWideningPreview, "typeWidening-preview"), - (ReaderFeature::V2Checkpoint, "v2Checkpoint"), - (ReaderFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), - (ReaderFeature::VariantType, "variantType"), - (ReaderFeature::VariantTypePreview, "variantType-preview"), + // Writer-only features + (TableFeature::AppendOnly, "appendOnly"), + (TableFeature::Invariants, "invariants"), + (TableFeature::CheckConstraints, "checkConstraints"), + (TableFeature::ChangeDataFeed, "changeDataFeed"), + (TableFeature::GeneratedColumns, "generatedColumns"), + (TableFeature::IdentityColumns, "identityColumns"), + (TableFeature::InCommitTimestamp, "inCommitTimestamp"), + (TableFeature::RowTracking, "rowTracking"), + (TableFeature::DomainMetadata, "domainMetadata"), + (TableFeature::IcebergCompatV1, "icebergCompatV1"), + (TableFeature::IcebergCompatV2, "icebergCompatV2"), + (TableFeature::ClusteredTable, "clustering"), + // ReaderWriter features + (TableFeature::CatalogManaged, "catalogManaged"), + (TableFeature::CatalogOwnedPreview, "catalogOwned-preview"), + (TableFeature::ColumnMapping, "columnMapping"), + (TableFeature::DeletionVectors, "deletionVectors"), + (TableFeature::TimestampWithoutTimezone, "timestampNtz"), + (TableFeature::TypeWidening, "typeWidening"), + (TableFeature::TypeWideningPreview, "typeWidening-preview"), + (TableFeature::V2Checkpoint, "v2Checkpoint"), + (TableFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), + (TableFeature::VariantType, "variantType"), + (TableFeature::VariantTypePreview, "variantType-preview"), ( - ReaderFeature::VariantShreddingPreview, + TableFeature::VariantShreddingPreview, "variantShredding-preview", ), - (ReaderFeature::unknown("something"), "something"), + (TableFeature::unknown("something"), "something"), ]; - assert_eq!(ReaderFeature::COUNT, cases.len()); + assert_eq!(TableFeature::COUNT, cases.len()); for (feature, expected) in cases { assert_eq!(feature.to_string(), expected); let serialized = serde_json::to_string(&feature).unwrap(); assert_eq!(serialized, format!("\"{expected}\"")); - let deserialized: ReaderFeature = serde_json::from_str(&serialized).unwrap(); + let deserialized: TableFeature = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, feature); - let from_str: ReaderFeature = expected.parse().unwrap(); + let from_str: TableFeature = expected.parse().unwrap(); assert_eq!(from_str, feature); } } #[test] - fn test_roundtrip_writer_features() { - let cases = [ - (WriterFeature::AppendOnly, "appendOnly"), - (WriterFeature::CatalogManaged, "catalogManaged"), - (WriterFeature::CatalogOwnedPreview, "catalogOwned-preview"), - (WriterFeature::Invariants, "invariants"), - (WriterFeature::CheckConstraints, "checkConstraints"), - (WriterFeature::ChangeDataFeed, "changeDataFeed"), - (WriterFeature::GeneratedColumns, "generatedColumns"), - (WriterFeature::ColumnMapping, "columnMapping"), - (WriterFeature::IdentityColumns, "identityColumns"), - (WriterFeature::InCommitTimestamp, "inCommitTimestamp"), - (WriterFeature::DeletionVectors, "deletionVectors"), - (WriterFeature::RowTracking, "rowTracking"), - (WriterFeature::TimestampWithoutTimezone, "timestampNtz"), - (WriterFeature::TypeWidening, "typeWidening"), - (WriterFeature::TypeWideningPreview, "typeWidening-preview"), - (WriterFeature::DomainMetadata, "domainMetadata"), - (WriterFeature::V2Checkpoint, "v2Checkpoint"), - (WriterFeature::IcebergCompatV1, "icebergCompatV1"), - (WriterFeature::IcebergCompatV2, "icebergCompatV2"), - (WriterFeature::VacuumProtocolCheck, "vacuumProtocolCheck"), - (WriterFeature::ClusteredTable, "clustering"), - (WriterFeature::VariantType, "variantType"), - (WriterFeature::VariantTypePreview, "variantType-preview"), - ( - WriterFeature::VariantShreddingPreview, - "variantShredding-preview", - ), - (WriterFeature::unknown("something"), "something"), - ]; + fn test_feature_info() { + // Test that all known features return Some(FeatureInfo) + assert!(TableFeature::AppendOnly.info().is_some()); + assert!(TableFeature::ColumnMapping.info().is_some()); + assert!(TableFeature::DeletionVectors.info().is_some()); - assert_eq!(WriterFeature::COUNT, cases.len()); + // Test that unknown features return None + assert!(TableFeature::unknown("unknown").info().is_none()); - for (feature, expected) in cases { - assert_eq!(feature.to_string(), expected); - let serialized = serde_json::to_string(&feature).unwrap(); - assert_eq!(serialized, format!("\"{expected}\"")); + // Test specific feature info + let dv_info = TableFeature::DeletionVectors.info().unwrap(); + assert_eq!(dv_info.name, "deletionVectors"); + assert_eq!(dv_info.min_reader_version, 3); + assert_eq!(dv_info.min_writer_version, 7); + assert_eq!(dv_info.feature_type, FeatureType::ReaderWriter); + assert!(dv_info.has_read_support); + assert!(dv_info.has_write_support); - let deserialized: WriterFeature = serde_json::from_str(&serialized).unwrap(); - assert_eq!(deserialized, feature); + // Test feature with dependencies + let row_tracking_info = TableFeature::RowTracking.info().unwrap(); + assert_eq!(row_tracking_info.required_features.len(), 1); + assert_eq!( + row_tracking_info.required_features[0], + TableFeature::DomainMetadata + ); + } - let from_str: WriterFeature = expected.parse().unwrap(); - assert_eq!(from_str, feature); + mod table_features_tests { + use super::*; + use crate::actions::Protocol; + + fn make_legacy_protocol(min_reader: i32, min_writer: i32) -> Protocol { + Protocol::try_new( + min_reader, + min_writer, + None::>, + None::>, + ) + .unwrap() + } + + fn make_table_features_protocol( + reader_features: Vec, + writer_features: Vec, + ) -> Protocol { + Protocol::try_new(3, 7, Some(reader_features), Some(writer_features)).unwrap() + } + + #[test] + fn test_legacy_protocol_support() { + let protocol = make_legacy_protocol(1, 2); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // AppendOnly requires (1, 2) + assert!(features.is_supported(&TableFeature::AppendOnly)); + + // DeletionVectors requires (3, 7) + assert!(!features.is_supported(&TableFeature::DeletionVectors)); + } + + #[test] + fn test_table_features_protocol_support() { + let protocol = make_table_features_protocol( + vec![TableFeature::DeletionVectors, TableFeature::ColumnMapping], + vec![ + TableFeature::DeletionVectors, + TableFeature::ColumnMapping, + TableFeature::AppendOnly, + ], + ); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // DeletionVectors is in both lists + assert!(features.is_supported(&TableFeature::DeletionVectors)); + + // AppendOnly is writer-only and in writer_features + assert!(features.is_supported(&TableFeature::AppendOnly)); + + // RowTracking is not in any list + assert!(!features.is_supported(&TableFeature::RowTracking)); + + // ColumnMapping is ReaderWriter and in both lists + assert!(features.is_supported(&TableFeature::ColumnMapping)); + } + + #[test] + fn test_is_enabled_always_if_supported() { + let protocol = make_table_features_protocol( + vec![TableFeature::DeletionVectors], + vec![TableFeature::DeletionVectors], + ); + let mut properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // DeletionVectors uses EnabledIf check, so needs property set + assert!(!features.is_enabled(&TableFeature::DeletionVectors)); + + // Set the property + properties.enable_deletion_vectors = Some(true); + let features = TableFeatures::new(&protocol, &properties); + assert!(features.is_enabled(&TableFeature::DeletionVectors)); + } + + #[test] + fn test_is_enabled_with_property_check() { + let protocol = make_table_features_protocol(vec![], vec![TableFeature::AppendOnly]); + let mut properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // AppendOnly is supported but not enabled + assert!(features.is_supported(&TableFeature::AppendOnly)); + assert!(!features.is_enabled(&TableFeature::AppendOnly)); + + // Enable it via property + properties.append_only = Some(true); + let features = TableFeatures::new(&protocol, &properties); + assert!(features.is_enabled(&TableFeature::AppendOnly)); + } + + #[test] + fn test_get_supported_features() { + let protocol = make_table_features_protocol( + vec![TableFeature::ColumnMapping], + vec![ + TableFeature::ColumnMapping, + TableFeature::AppendOnly, + TableFeature::DomainMetadata, + ], + ); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + let supported = features.get_supported_features(); + assert_eq!(supported.len(), 3); + assert!(supported.contains(&TableFeature::ColumnMapping)); + assert!(supported.contains(&TableFeature::AppendOnly)); + assert!(supported.contains(&TableFeature::DomainMetadata)); + } + + #[test] + fn test_validate_read_support() { + let protocol = make_table_features_protocol( + vec![TableFeature::DeletionVectors], + vec![TableFeature::DeletionVectors], + ); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // DeletionVectors has read support + assert!(features.validate_read_support().is_ok()); + + // CheckConstraints doesn't have read support + let protocol = + make_table_features_protocol(vec![], vec![TableFeature::CheckConstraints]); + let features = TableFeatures::new(&protocol, &properties); + assert!(features.validate_read_support().is_ok()); + } + + #[test] + fn test_validate_write_support() { + let protocol = make_table_features_protocol(vec![], vec![TableFeature::AppendOnly]); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // AppendOnly has write support + assert!(features.validate_write_support().is_ok()); + + // CheckConstraints doesn't have write support + let protocol = + make_table_features_protocol(vec![], vec![TableFeature::CheckConstraints]); + let features = TableFeatures::new(&protocol, &properties); + assert!(features.validate_write_support().is_err()); + } + + #[test] + fn test_validate_dependencies() { + // RowTracking requires DomainMetadata + let protocol = make_table_features_protocol( + vec![], + vec![TableFeature::RowTracking, TableFeature::DomainMetadata], + ); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // Should pass because DomainMetadata is present + assert!(features.validate_dependencies().is_ok()); + + // Missing DomainMetadata should fail + let protocol = make_table_features_protocol(vec![], vec![TableFeature::RowTracking]); + let features = TableFeatures::new(&protocol, &properties); + assert!(features.validate_dependencies().is_err()); + } + + #[test] + fn test_is_in_feature_lists_writer_only() { + let protocol = make_table_features_protocol(vec![], vec![TableFeature::AppendOnly]); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + assert!(features.is_in_feature_lists(&TableFeature::AppendOnly)); + assert!(!features.is_in_feature_lists(&TableFeature::RowTracking)); + } + + #[test] + fn test_is_in_feature_lists_reader_writer() { + // ReaderWriter feature must be in both lists + let protocol = make_table_features_protocol( + vec![TableFeature::DeletionVectors], + vec![TableFeature::DeletionVectors], + ); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + assert!(features.is_in_feature_lists(&TableFeature::DeletionVectors)); + + // Test that a feature not in any list returns false + assert!(!features.is_in_feature_lists(&TableFeature::ColumnMapping)); + + // Test writer-only feature in writer list + let protocol = make_table_features_protocol(vec![], vec![TableFeature::AppendOnly]); + let features = TableFeatures::new(&protocol, &properties); + assert!(features.is_in_feature_lists(&TableFeature::AppendOnly)); + } + + #[test] + fn test_unknown_feature_not_supported() { + let protocol = make_table_features_protocol( + vec![TableFeature::unknown("cool_feature")], + vec![TableFeature::unknown("cool_feature")], + ); + let properties = TableProperties::default(); + let features = TableFeatures::new(&protocol, &properties); + + // Unknown features are never supported + assert!(!features.is_supported(&TableFeature::unknown("cool_feature"))); + assert!(!features.is_enabled(&TableFeature::unknown("cool_feature"))); } } } diff --git a/kernel/src/table_features/timestamp_ntz.rs b/kernel/src/table_features/timestamp_ntz.rs index 32c1488ed..54eb5e800 100644 --- a/kernel/src/table_features/timestamp_ntz.rs +++ b/kernel/src/table_features/timestamp_ntz.rs @@ -1,6 +1,6 @@ //! Validation for TIMESTAMP_NTZ feature support -use super::{ReaderFeature, WriterFeature}; +use super::TableFeature; use crate::actions::Protocol; use crate::schema::{PrimitiveType, Schema, SchemaTransform}; use crate::utils::require; @@ -14,9 +14,7 @@ pub(crate) fn validate_timestamp_ntz_feature_support( schema: &Schema, protocol: &Protocol, ) -> DeltaResult<()> { - if !protocol.has_reader_feature(&ReaderFeature::TimestampWithoutTimezone) - || !protocol.has_writer_feature(&WriterFeature::TimestampWithoutTimezone) - { + if !protocol.has_table_feature(&TableFeature::TimestampWithoutTimezone) { let mut uses_timestamp_ntz = UsesTimestampNtz(false); let _ = uses_timestamp_ntz.transform_struct(schema); require!( @@ -46,7 +44,7 @@ mod tests { use super::*; use crate::actions::Protocol; use crate::schema::{DataType, PrimitiveType, StructField, StructType}; - use crate::table_features::{ReaderFeature, WriterFeature}; + use crate::table_features::TableFeature; use crate::utils::test_utils::assert_result_error_with_message; #[test] @@ -65,8 +63,8 @@ mod tests { let protocol_with_features = Protocol::try_new( 3, 7, - Some([ReaderFeature::TimestampWithoutTimezone]), - Some([WriterFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), + Some([TableFeature::TimestampWithoutTimezone]), ) .unwrap(); diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs index a8b228cd4..7f7b3d05e 100644 --- a/kernel/tests/write.rs +++ b/kernel/tests/write.rs @@ -907,9 +907,7 @@ async fn test_append_variant() -> Result<(), Box> { // We can add shredding features as well as we are allowed to write unshredded variants // into shredded tables and shredded reads are explicitly blocked in the default // engine's parquet reader. - // TODO: (#1124) we don't actually support column mapping writes yet, but have some - // tests that do column mapping on writes. For now omit the writer feature to let tests - // run, but after actual support this should be enabled. + // columnMapping is a ReaderWriter feature and must be in both reader and writer features let table_url = create_table( store.clone(), table_location, @@ -917,7 +915,7 @@ async fn test_append_variant() -> Result<(), Box> { &[], true, vec!["variantType", "variantShredding-preview", "columnMapping"], - vec!["variantType", "variantShredding-preview"], + vec!["variantType", "variantShredding-preview", "columnMapping"], ) .await?;