-
Couldn't load subscription status.
- Fork 118
refactor: Separate domain metadata additions and removals #1421
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
DrakeLin
wants to merge
3
commits into
delta-io:main
Choose a base branch
from
DrakeLin:drake-lin_data/fix-domain-metadata
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 2 commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ use url::Url; | |
| use crate::actions::{ | ||
| as_log_add_schema, domain_metadata::scan_domain_metadatas, get_log_commit_info_schema, | ||
| get_log_domain_metadata_schema, get_log_txn_schema, CommitInfo, DomainMetadata, SetTransaction, | ||
| INTERNAL_DOMAIN_PREFIX, | ||
| }; | ||
| #[cfg(feature = "catalog-managed")] | ||
| use crate::committer::FileSystemCommitter; | ||
|
|
@@ -134,7 +135,11 @@ pub struct Transaction { | |
| // commit-wide timestamp (in milliseconds since epoch) - used in ICT, `txn` action, etc. to | ||
| // keep all timestamps within the same commit consistent. | ||
| commit_timestamp: i64, | ||
| domain_metadatas: Vec<DomainMetadata>, | ||
| // Domain metadata additions for this transaction. | ||
| domain_metadatas_to_add: Vec<DomainMetadata>, | ||
| // Domain names to remove in this transaction. The configuration values are fetched during | ||
| // commit from the log to preserve the pre-image in tombstones. | ||
| domains_to_remove: Vec<String>, | ||
|
||
| // Whether this transaction contains any logical data changes. | ||
| data_change: bool, | ||
| } | ||
|
|
@@ -177,7 +182,8 @@ impl Transaction { | |
| add_files_metadata: vec![], | ||
| set_transactions: vec![], | ||
| commit_timestamp, | ||
| domain_metadatas: vec![], | ||
| domain_metadatas_to_add: vec![], | ||
| domains_to_remove: vec![], | ||
| data_change: true, | ||
| }) | ||
| } | ||
|
|
@@ -347,8 +353,8 @@ impl Transaction { | |
| /// fail (that is, we don't eagerly check domain validity here). | ||
| /// Setting metadata for multiple distinct domains is allowed. | ||
| pub fn with_domain_metadata(mut self, domain: String, configuration: String) -> Self { | ||
| self.domain_metadatas | ||
| .push(DomainMetadata::new(domain, configuration)); | ||
| self.domain_metadatas_to_add | ||
| .push(DomainMetadata::new(domain, configuration, false)); | ||
| self | ||
| } | ||
|
|
||
|
|
@@ -362,9 +368,7 @@ impl Transaction { | |
| /// fail (that is, we don't eagerly check domain validity here). | ||
| /// Removing metadata for multiple distinct domains is allowed. | ||
| pub fn with_domain_metadata_removed(mut self, domain: String) -> Self { | ||
| // actual configuration value determined during commit | ||
| self.domain_metadatas | ||
| .push(DomainMetadata::remove(domain, String::new())); | ||
| self.domains_to_remove.push(domain); | ||
| self | ||
| } | ||
|
|
||
|
|
@@ -378,8 +382,8 @@ impl Transaction { | |
| engine: &'a dyn Engine, | ||
| row_tracking_high_watermark: Option<RowTrackingDomainMetadata>, | ||
| ) -> DeltaResult<impl Iterator<Item = DeltaResult<Box<dyn EngineData>>> + 'a> { | ||
| // if there are domain metadata actions, the table must support it | ||
| if !self.domain_metadatas.is_empty() | ||
| // if there are domain metadata actions (additions or removals), the table must support it | ||
| if (!self.domain_metadatas_to_add.is_empty() || !self.domains_to_remove.is_empty()) | ||
| && !self | ||
| .read_snapshot | ||
| .table_configuration() | ||
|
|
@@ -390,58 +394,57 @@ impl Transaction { | |
| )); | ||
| } | ||
|
|
||
| // validate user domain metadata and check if we have removals | ||
| // validate all domain metadata operations (additions and removals) | ||
| let mut seen_domains = HashSet::new(); | ||
| let mut has_removals = false; | ||
| for dm in &self.domain_metadatas { | ||
| if dm.is_internal() { | ||
|
|
||
| // chain both additions and removals into a single iterator of domain names | ||
DrakeLin marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| let all_domains = self | ||
| .domain_metadatas_to_add | ||
| .iter() | ||
| .map(|dm| dm.domain()) | ||
| .chain(self.domains_to_remove.iter().map(String::as_str)); | ||
|
|
||
| for domain in all_domains { | ||
| if domain.starts_with(INTERNAL_DOMAIN_PREFIX) { | ||
| return Err(Error::Generic( | ||
| "Cannot modify domains that start with 'delta.' as those are system controlled" | ||
| .to_string(), | ||
| )); | ||
| } | ||
|
|
||
| if !seen_domains.insert(dm.domain()) { | ||
| if !seen_domains.insert(domain) { | ||
| return Err(Error::Generic(format!( | ||
| "Metadata for domain {} already specified in this transaction", | ||
| dm.domain() | ||
| domain | ||
| ))); | ||
| } | ||
|
|
||
| if dm.is_removed() { | ||
| has_removals = true; | ||
| } | ||
| } | ||
|
|
||
| // fetch previous configuration values (requires log replay) | ||
| let existing_domains = if has_removals { | ||
| // fetch previous configuration values if we have removals (requires log replay) | ||
| let existing_domains = if !self.domains_to_remove.is_empty() { | ||
| scan_domain_metadatas(self.read_snapshot.log_segment(), None, engine)? | ||
| } else { | ||
| HashMap::new() | ||
| }; | ||
|
|
||
| let user_domains = self | ||
| .domain_metadatas | ||
| .iter() | ||
| .filter_map(move |dm: &DomainMetadata| { | ||
| if dm.is_removed() { | ||
| existing_domains.get(dm.domain()).map(|existing| { | ||
| DomainMetadata::remove( | ||
| dm.domain().to_string(), | ||
| existing.configuration().to_string(), | ||
| ) | ||
| }) | ||
| } else { | ||
| Some(dm.clone()) | ||
| } | ||
| }); | ||
| // process domain additions - clone directly since domain_metadatas_to_add only contains additions | ||
| let domain_additions = self.domain_metadatas_to_add.iter().cloned(); | ||
|
|
||
| // process domain removals - fetch configuration from existing domains and create tombstones | ||
| let domain_removals = self.domains_to_remove.iter().filter_map(move |domain| { | ||
| // if domain doesn't exist in the log, this is a no-op (filter it out) | ||
| existing_domains.get(domain).map(|existing| { | ||
| DomainMetadata::new(domain.clone(), existing.configuration().to_owned(), true) | ||
| }) | ||
| }); | ||
|
|
||
| let system_domains = row_tracking_high_watermark | ||
| .map(DomainMetadata::try_from) | ||
| .transpose()? | ||
| .into_iter(); | ||
|
|
||
| Ok(user_domains | ||
| Ok(domain_additions | ||
| .chain(domain_removals) | ||
| .chain(system_domains) | ||
| .map(|dm| dm.into_engine_data(get_log_domain_metadata_schema().clone(), engine))) | ||
| } | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.