From c8e33f75d44bc3b46fa24cbad0a6b6e7014f698e Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Wed, 13 Nov 2024 18:16:34 +0800 Subject: [PATCH 01/22] refactor: unify Plan and PhysicalPlan of copy into and insert --- .../interpreters/access/privilege_access.rs | 37 +- .../interpreter_copy_into_table.rs | 360 +++++++-------- .../src/interpreters/interpreter_explain.rs | 2 +- .../src/interpreters/interpreter_factory.rs | 11 +- .../src/interpreters/interpreter_replace.rs | 36 +- src/query/service/src/sessions/queue_mgr.rs | 5 +- .../physical_copy_into_table.rs | 1 - src/query/sql/src/planner/binder/binder.rs | 3 +- .../sql/src/planner/binder/copy_into_table.rs | 435 +++++++++--------- src/query/sql/src/planner/binder/insert.rs | 181 +++++--- .../sql/src/planner/format/display_plan.rs | 4 +- .../sql/src/planner/optimizer/optimizer.rs | 62 +-- .../sql/src/planner/plans/copy_into_table.rs | 320 +++++++------ src/query/sql/src/planner/plans/insert.rs | 97 ++-- src/query/sql/src/planner/plans/plan.rs | 47 +- 15 files changed, 818 insertions(+), 783 deletions(-) diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index 0eaa433bc235e..88f70769b48d1 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -1026,17 +1026,17 @@ impl AccessChecker for PrivilegeAccess { .await?; } // Others. - Plan::Insert(plan) => { - let target_table_privileges = if plan.overwrite { - vec![UserPrivilegeType::Insert, UserPrivilegeType::Delete] - } else { - vec![UserPrivilegeType::Insert] - }; - for privilege in target_table_privileges { - self.validate_table_access(&plan.catalog, &plan.database, &plan.table, privilege, false, false).await?; - } - self.validate_insert_source(ctx, &plan.source).await?; - } + // Plan::Insert(plan) => { + // let target_table_privileges = if plan.overwrite { + // vec![UserPrivilegeType::Insert, UserPrivilegeType::Delete] + // } else { + // vec![UserPrivilegeType::Insert] + // }; + // for privilege in target_table_privileges { + // self.validate_table_access(&plan.catalog, &plan.database, &plan.table, privilege, false, false).await?; + // } + // self.validate_insert_source(ctx, &plan.source).await?; + // } Plan::InsertMultiTable(plan) => { let target_table_privileges = if plan.overwrite { vec![UserPrivilegeType::Insert, UserPrivilegeType::Delete] @@ -1187,12 +1187,15 @@ impl AccessChecker for PrivilegeAccess { self.validate_access(&GrantObject::Global, UserPrivilegeType::Alter, false, false) .await?; } - Plan::CopyIntoTable(plan) => { - self.validate_stage_access(&plan.stage_table_info.stage_info, UserPrivilegeType::Read).await?; - self.validate_table_access(plan.catalog_info.catalog_name(), &plan.database_name, &plan.table_name, UserPrivilegeType::Insert, false, false).await?; - if let Some(query) = &plan.query { - self.check(ctx, query).await?; - } + Plan::CopyIntoTable { .. } => { + // match &plan.source{ + + // } + // self.validate_stage_access(&plan.stage_table_info.stage_info, UserPrivilegeType::Read).await?; + // self.validate_table_access(&plan.catalog_name, &plan.database_name, &plan.table_name, UserPrivilegeType::Insert, false, false).await?; + // if let Some(query) = &plan.query { + // self.check(ctx, query).await?; + // } } Plan::CopyIntoLocation(plan) => { self.validate_stage_access(&plan.stage, UserPrivilegeType::Write).await?; diff --git a/src/query/service/src/interpreters/interpreter_copy_into_table.rs b/src/query/service/src/interpreters/interpreter_copy_into_table.rs index e03d5500c87ce..1da643d5aa2f5 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_table.rs +++ b/src/query/service/src/interpreters/interpreter_copy_into_table.rs @@ -16,6 +16,7 @@ use std::collections::BTreeMap; use std::sync::Arc; use databend_common_catalog::lock::LockTableOption; +use databend_common_catalog::plan::StageTableInfo; use databend_common_exception::Result; use databend_common_expression::types::Int32Type; use databend_common_expression::types::StringType; @@ -32,6 +33,7 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::physical_plans::TableScan; use databend_common_sql::executor::table_read_plan::ToReadDataSourcePlan; use databend_common_sql::executor::PhysicalPlan; +use databend_common_sql::plans::AppendSource; use databend_common_storage::StageFileInfo; use databend_common_storages_stage::StageTable; use log::debug; @@ -97,95 +99,89 @@ impl CopyIntoTableInterpreter { &self, plan: &CopyIntoTablePlan, ) -> Result<(PhysicalPlan, Vec)> { - let to_table = self - .ctx - .get_table( - plan.catalog_info.catalog_name(), - &plan.database_name, - &plan.table_name, - ) - .await?; - let mut update_stream_meta_reqs = vec![]; - let (source, project_columns) = if let Some(ref query) = plan.query { - let query = if plan.enable_distributed { - query.remove_exchange_for_select() - } else { - *query.clone() - }; - - let (query_interpreter, update_stream_meta) = self.build_query(&query).await?; - update_stream_meta_reqs = update_stream_meta; - let query_physical_plan = Box::new(query_interpreter.build_physical_plan().await?); - - let result_columns = query_interpreter.get_result_columns(); - ( - CopyIntoTableSource::Query(query_physical_plan), - Some(result_columns), - ) - } else { - let stage_table = StageTable::try_create(plan.stage_table_info.clone())?; - - let data_source_plan = stage_table - .read_plan(self.ctx.clone(), None, None, false, false) - .await?; - - let mut name_mapping = BTreeMap::new(); - for (idx, field) in data_source_plan.schema().fields.iter().enumerate() { - name_mapping.insert(field.name.clone(), idx); - } - - ( - CopyIntoTableSource::Stage(Box::new(PhysicalPlan::TableScan(TableScan { - plan_id: 0, - name_mapping, - stat_info: None, - table_index: None, - internal_column: None, - source: Box::new(data_source_plan), - }))), - None, - ) - }; - - let mut root = PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { - plan_id: 0, - required_values_schema: plan.required_values_schema.clone(), - values_consts: plan.values_consts.clone(), - required_source_schema: plan.required_source_schema.clone(), - stage_table_info: plan.stage_table_info.clone(), - table_info: to_table.get_table_info().clone(), - force: plan.force, - write_mode: plan.write_mode, - validation_mode: plan.validation_mode.clone(), - project_columns, - source, - is_transform: plan.is_transform, - })); - - if plan.enable_distributed { - root = PhysicalPlan::Exchange(Exchange { - plan_id: 0, - input: Box::new(root), - kind: FragmentKind::Merge, - keys: Vec::new(), - allow_adjust_parallelism: true, - ignore_exchange: false, - }); - } - - let mut next_plan_id = 0; - root.adjust_plan_id(&mut next_plan_id); - - Ok((root, update_stream_meta_reqs)) + // let to_table = self + // .ctx + // .get_table(&plan.catalog_name, &plan.database_name, &plan.table_name) + // .await?; + // let mut update_stream_meta_reqs = vec![]; + // let (source, project_columns) = if let Some(ref query) = plan.query { + // let query = if plan.enable_distributed { + // query.remove_exchange_for_select() + // } else { + // *query.clone() + // }; + + // let (query_interpreter, update_stream_meta) = self.build_query(&query).await?; + // update_stream_meta_reqs = update_stream_meta; + // let query_physical_plan = Box::new(query_interpreter.build_physical_plan().await?); + + // let result_columns = query_interpreter.get_result_columns(); + // ( + // CopyIntoTableSource::Query(query_physical_plan), + // Some(result_columns), + // ) + // } else { + // let stage_table = StageTable::try_create(plan.stage_table_info.clone())?; + + // let data_source_plan = stage_table + // .read_plan(self.ctx.clone(), None, None, false, false) + // .await?; + + // let mut name_mapping = BTreeMap::new(); + // for (idx, field) in data_source_plan.schema().fields.iter().enumerate() { + // name_mapping.insert(field.name.clone(), idx); + // } + + // ( + // CopyIntoTableSource::Stage(Box::new(PhysicalPlan::TableScan(TableScan { + // plan_id: 0, + // name_mapping, + // stat_info: None, + // table_index: None, + // internal_column: None, + // source: Box::new(data_source_plan), + // }))), + // None, + // ) + // }; + + // let mut root = PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + // plan_id: 0, + // required_values_schema: plan.required_values_schema.clone(), + // values_consts: plan.values_consts.clone(), + // required_source_schema: plan.required_source_schema.clone(), + // stage_table_info: plan.stage_table_info.clone(), + // table_info: to_table.get_table_info().clone(), + // write_mode: plan.write_mode, + // validation_mode: plan.validation_mode.clone(), + // project_columns, + // source, + // is_transform: plan.is_transform, + // })); + + // if plan.enable_distributed { + // root = PhysicalPlan::Exchange(Exchange { + // plan_id: 0, + // input: Box::new(root), + // kind: FragmentKind::Merge, + // keys: Vec::new(), + // allow_adjust_parallelism: true, + // ignore_exchange: false, + // }); + // } + + // let mut next_plan_id = 0; + // root.adjust_plan_id(&mut next_plan_id); + + // Ok((root, update_stream_meta_reqs)) + todo!() } - fn get_copy_into_table_result(&self) -> Result> { - let return_all = !self - .plan - .stage_table_info - .stage_info - .copy_options - .return_failed_only; + fn get_copy_into_table_result( + &self, + stage_table_info: &StageTableInfo, + ) -> Result> { + let return_all = !stage_table_info.stage_info.copy_options.return_failed_only; let cs = self.ctx.get_copy_status(); let mut results = cs.files.iter().collect::>(); @@ -236,56 +232,52 @@ impl CopyIntoTableInterpreter { ) -> Result<()> { let ctx = self.ctx.clone(); let to_table = ctx - .get_table( - plan.catalog_info.catalog_name(), - &plan.database_name, - &plan.table_name, - ) + .get_table(&plan.catalog_name, &plan.database_name, &plan.table_name) .await?; // Commit. - { - let copied_files_meta_req = PipelineBuilder::build_upsert_copied_files_to_meta_req( - ctx.clone(), - to_table.as_ref(), - &plan.stage_table_info.stage_info, - &files_to_copy, - plan.force, - )?; - - to_table.commit_insertion( - ctx.clone(), - main_pipeline, - copied_files_meta_req, - update_stream_meta, - plan.write_mode.is_overwrite(), - None, - deduplicated_label, - )?; - } + // { + // let copied_files_meta_req = PipelineBuilder::build_upsert_copied_files_to_meta_req( + // ctx.clone(), + // to_table.as_ref(), + // &plan.stage_table_info.stage_info, + // &files_to_copy, + // plan.force, + // )?; + + // to_table.commit_insertion( + // ctx.clone(), + // main_pipeline, + // copied_files_meta_req, + // update_stream_meta, + // plan.write_mode.is_overwrite(), + // None, + // deduplicated_label, + // )?; + // } // Purge files. - { - info!( - "set files to be purged, # of copied files: {}, # of duplicated files: {}", - files_to_copy.len(), - duplicated_files_detected.len() - ); - - let files_to_be_deleted = files_to_copy - .into_iter() - .map(|v| v.path) - .chain(duplicated_files_detected) - .collect::>(); - // set on_finished callback. - PipelineBuilder::set_purge_files_on_finished( - ctx.clone(), - files_to_be_deleted, - plan.stage_table_info.stage_info.copy_options.purge, - plan.stage_table_info.stage_info.clone(), - main_pipeline, - )?; - } + // { + // info!( + // "set files to be purged, # of copied files: {}, # of duplicated files: {}", + // files_to_copy.len(), + // duplicated_files_detected.len() + // ); + + // let files_to_be_deleted = files_to_copy + // .into_iter() + // .map(|v| v.path) + // .chain(duplicated_files_detected) + // .collect::>(); + // // set on_finished callback. + // PipelineBuilder::set_purge_files_on_finished( + // ctx.clone(), + // files_to_be_deleted, + // plan.stage_table_info.stage_info.copy_options.purge, + // plan.stage_table_info.stage_info.clone(), + // main_pipeline, + // )?; + // } Ok(()) } @@ -299,29 +291,29 @@ impl CopyIntoTableInterpreter { // unfortunately, hooking the on_finished callback of a "blank" pipeline, // e.g. `PipelineBuildResult::create` leads to runtime error (during pipeline execution). - if self.plan.stage_table_info.stage_info.copy_options.purge - && !self - .plan - .stage_table_info - .duplicated_files_detected - .is_empty() - && self - .ctx - .get_settings() - .get_enable_purge_duplicated_files_in_copy()? - { - info!( - "purge_duplicated_files_in_copy enabled, number of duplicated files: {}", - self.plan.stage_table_info.duplicated_files_detected.len() - ); - - PipelineBuilder::purge_files_immediately( - self.ctx.clone(), - self.plan.stage_table_info.duplicated_files_detected.clone(), - self.plan.stage_table_info.stage_info.clone(), - ) - .await?; - } + // if self.plan.stage_table_info.stage_info.copy_options.purge + // && !self + // .plan + // .stage_table_info + // .duplicated_files_detected + // .is_empty() + // && self + // .ctx + // .get_settings() + // .get_enable_purge_duplicated_files_in_copy()? + // { + // info!( + // "purge_duplicated_files_in_copy enabled, number of duplicated files: {}", + // self.plan.stage_table_info.duplicated_files_detected.len() + // ); + + // PipelineBuilder::purge_files_immediately( + // self.ctx.clone(), + // self.plan.stage_table_info.duplicated_files_detected.clone(), + // self.plan.stage_table_info.stage_info.clone(), + // ) + // .await?; + // } Ok(PipelineBuildResult::create()) } } @@ -345,43 +337,37 @@ impl Interpreter for CopyIntoTableInterpreter { return Ok(PipelineBuildResult::create()); } - if self.plan.no_file_to_copy { - info!("no file to copy"); - return self.on_no_files_to_copy().await; - } + // if self.plan.no_file_to_copy { + // info!("no file to copy"); + // return self.on_no_files_to_copy().await; + // } let (physical_plan, update_stream_meta) = self.build_physical_plan(&self.plan).await?; let mut build_res = build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; // Build commit insertion pipeline. - { - let files_to_copy = self - .plan - .stage_table_info - .files_to_copy - .clone() - .unwrap_or_default(); - - let duplicated_files_detected = - self.plan.stage_table_info.duplicated_files_detected.clone(); - - self.commit_insertion( - &mut build_res.main_pipeline, - &self.plan, - files_to_copy, - duplicated_files_detected, - update_stream_meta, - unsafe { self.ctx.get_settings().get_deduplicate_label()? }, - ) - .await?; - } + // { + // let files_to_copy = self.plan.source.files_to_copy(); + + // let duplicated_files_detected = self.plan.source.duplicated_files_detected(); + + // self.commit_insertion( + // &mut build_res.main_pipeline, + // &self.plan, + // files_to_copy, + // duplicated_files_detected, + // update_stream_meta, + // unsafe { self.ctx.get_settings().get_deduplicate_label()? }, + // ) + // .await?; + // } // Execute hook. { let hook_operator = HookOperator::create( self.ctx.clone(), - self.plan.catalog_info.catalog_name().to_string(), + self.plan.catalog_name.to_string(), self.plan.database_name.to_string(), self.plan.table_name.to_string(), MutationKind::Insert, @@ -394,12 +380,12 @@ impl Interpreter for CopyIntoTableInterpreter { } fn inject_result(&self) -> Result { - let blocks = if self.plan.no_file_to_copy { - vec![DataBlock::empty_with_schema(self.plan.schema())] - } else { - self.get_copy_into_table_result()? - }; + // let blocks = match &self.plan.source { + // AppendSource::Stage(stage) => self.get_copy_into_table_result(stage)?, + // _ => vec![DataBlock::empty_with_schema(self.plan.schema())], + // }; + todo!() - Ok(Box::pin(DataBlockStream::create(None, blocks))) + // Ok(Box::pin(DataBlockStream::create(None, blocks))) } } diff --git a/src/query/service/src/interpreters/interpreter_explain.rs b/src/query/service/src/interpreters/interpreter_explain.rs index 9191a3a103732..cff227967122b 100644 --- a/src/query/service/src/interpreters/interpreter_explain.rs +++ b/src/query/service/src/interpreters/interpreter_explain.rs @@ -105,7 +105,7 @@ impl Interpreter for ExplainInterpreter { self.explain_query(s_expr, metadata, bind_context, formatted_ast) .await? } - Plan::Insert(insert_plan) => insert_plan.explain(self.config.verbose).await?, + // Plan::Insert(insert_plan) => insert_plan.explain(self.config.verbose).await?, Plan::Replace(replace_plan) => replace_plan.explain(self.config.verbose).await?, Plan::CreateTable(plan) => match &plan.as_select { Some(box Plan::Query { diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index 773f82788c82a..0a90eb381e709 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -156,10 +156,11 @@ impl InterpreterFactory { *graphical, )?)), - Plan::CopyIntoTable(copy_plan) => Ok(Arc::new(CopyIntoTableInterpreter::try_create( - ctx, - *copy_plan.clone(), - )?)), + Plan::CopyIntoTable { .. } => todo!(), + // Ok(Arc::new(CopyIntoTableInterpreter::try_create( + // ctx, + // *copy_plan.clone(), + // )?)), Plan::CopyIntoLocation(copy_plan) => Ok(Arc::new( CopyIntoLocationInterpreter::try_create(ctx, copy_plan.clone())?, )), @@ -378,8 +379,6 @@ impl InterpreterFactory { *desc_user.clone(), )?)), - Plan::Insert(insert) => InsertInterpreter::try_create(ctx, *insert.clone()), - Plan::Replace(replace) => ReplaceInterpreter::try_create(ctx, *replace.clone()), Plan::DataMutation { s_expr, schema, .. } => Ok(Arc::new( MutationInterpreter::try_create(ctx, *s_expr.clone(), schema.clone())?, diff --git a/src/query/service/src/interpreters/interpreter_replace.rs b/src/query/service/src/interpreters/interpreter_replace.rs index b85dc319bab53..a182e3f5b1230 100644 --- a/src/query/service/src/interpreters/interpreter_replace.rs +++ b/src/query/service/src/interpreters/interpreter_replace.rs @@ -391,24 +391,24 @@ impl ReplaceInterpreter { self.connect_query_plan_source(ctx.clone(), plan).await } InsertInputSource::Stage(plan) => match *plan.clone() { - Plan::CopyIntoTable(copy_plan) => { - let interpreter = - CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; - let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; - - // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan - - *purge_info = Some(( - copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), - copy_plan.stage_table_info.stage_info.clone(), - )); - Ok(ReplaceSourceCtx { - root: Box::new(physical_plan), - select_ctx: None, - update_stream_meta: vec![], - bind_context: None, - }) - } + // Plan::CopyIntoTable(copy_plan) => { + // let interpreter = + // CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; + // let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; + + // // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan + + // *purge_info = Some(( + // copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), + // copy_plan.stage_table_info.stage_info.clone(), + // )); + // Ok(ReplaceSourceCtx { + // root: Box::new(physical_plan), + // select_ctx: None, + // update_stream_meta: vec![], + // bind_context: None, + // }) + // } _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), }, } diff --git a/src/query/service/src/sessions/queue_mgr.rs b/src/query/service/src/sessions/queue_mgr.rs index 231e05aff5b3b..270ea613db3d5 100644 --- a/src/query/service/src/sessions/queue_mgr.rs +++ b/src/query/service/src/sessions/queue_mgr.rs @@ -395,11 +395,10 @@ impl QueryEntry { } // Write: Heavy actions. - Plan::Insert(_) - | Plan::InsertMultiTable(_) + Plan::InsertMultiTable(_) | Plan::Replace(_) | Plan::DataMutation { .. } - | Plan::CopyIntoTable(_) + | Plan::CopyIntoTable { .. } | Plan::CopyIntoLocation(_) => { return true; } diff --git a/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs b/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs index eb8ebb1e4e4e0..1fd92f5c3ce57 100644 --- a/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs +++ b/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs @@ -34,7 +34,6 @@ pub struct CopyIntoTable { pub required_source_schema: DataSchemaRef, pub write_mode: CopyIntoTableMode, pub validation_mode: ValidationMode, - pub force: bool, pub stage_table_info: StageTableInfo, pub table_info: TableInfo, diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index 0646164cbf2fd..f893894cd03f5 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -401,7 +401,8 @@ impl<'a> Binder { warn!("In INSERT resolve optimize hints {:?} failed, err: {:?}", hints, e); } } - self.bind_insert(bind_context, stmt).await? + // self.bind_insert(bind_context, stmt).await? + todo!() } Statement::InsertMultiTable(stmt) => { self.bind_insert_multi_table(bind_context, stmt).await? diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 78d7d0837b847..f66a171c24d45 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -67,10 +67,11 @@ use parking_lot::RwLock; use crate::binder::bind_query::MaxColumnPosition; use crate::binder::location::parse_uri_location; use crate::binder::Binder; +use crate::optimizer::SExpr; +use crate::plans::AppendSource; use crate::plans::CopyIntoTableMode; use crate::plans::CopyIntoTablePlan; use crate::plans::Plan; -use crate::plans::ValidationMode; use crate::BindContext; use crate::Metadata; use crate::NameResolutionContext; @@ -83,34 +84,35 @@ impl<'a> Binder { bind_context: &mut BindContext, stmt: &CopyIntoTableStmt, ) -> Result { - match &stmt.src { - CopyIntoTableSource::Location(location) => { - let mut plan = self - .bind_copy_into_table_common(bind_context, stmt, location, false) - .await?; - - // for copy from location, collect files explicitly - plan.collect_files(self.ctx.as_ref()).await?; - self.bind_copy_into_table_from_location(bind_context, plan) - .await - } - CopyIntoTableSource::Query(query) => { - self.init_cte(bind_context, &stmt.with)?; - - let mut max_column_position = MaxColumnPosition::new(); - query.drive(&mut max_column_position); - self.metadata - .write() - .set_max_column_position(max_column_position.max_pos); - let (select_list, location, alias) = check_transform_query(query)?; - let plan = self - .bind_copy_into_table_common(bind_context, stmt, location, true) - .await?; - - self.bind_copy_from_query_into_table(bind_context, plan, select_list, alias) - .await - } - } + // match &stmt.src { + // CopyIntoTableSource::Location(location) => { + // let mut plan = self + // .bind_copy_into_table_common(bind_context, stmt, location) + // .await?; + + // // for copy from location, collect files explicitly + // plan.collect_files(self.ctx.as_ref()).await?; + // self.bind_copy_into_table_from_location(bind_context, plan) + // .await + // } + // CopyIntoTableSource::Query(query) => { + // self.init_cte(bind_context, &stmt.with)?; + + // let mut max_column_position = MaxColumnPosition::new(); + // query.drive(&mut max_column_position); + // self.metadata + // .write() + // .set_max_column_position(max_column_position.max_pos); + // let (select_list, location, alias) = check_transform_query(query)?; + // let plan = self + // .bind_copy_into_table_common(bind_context, stmt, location) + // .await?; + + // self.bind_copy_from_query_into_table(bind_context, plan, select_list, alias) + // .await + // } + // } + todo!() } pub(crate) fn resolve_copy_pattern( @@ -138,23 +140,17 @@ impl<'a> Binder { bind_context: &mut BindContext, stmt: &CopyIntoTableStmt, location: &FileLocation, - is_transform: bool, - ) -> Result { + ) -> Result { let (catalog_name, database_name, table_name) = self.normalize_object_identifier_triple( &stmt.dst.catalog, &stmt.dst.database, &stmt.dst.table, ); - let catalog = self.ctx.get_catalog(&catalog_name).await?; - let catalog_info = catalog.info(); let table = self .ctx .get_table(&catalog_name, &database_name, &table_name) .await?; - let validation_mode = ValidationMode::from_str(stmt.validation_mode.as_str()) - .map_err(ErrorCode::SyntaxException)?; - let (mut stage_info, path) = resolve_file_location(self.ctx.as_ref(), location).await?; self.apply_copy_into_table_options(stmt, &mut stage_info) .await?; @@ -186,34 +182,31 @@ impl<'a> Binder { } else { None }; - - Ok(CopyIntoTablePlan { - catalog_info, - database_name, - table_name, - validation_mode, - is_transform, - no_file_to_copy: false, - from_attachment: false, - force: stmt.force, - stage_table_info: StageTableInfo { - schema: stage_schema, - files_info, - stage_info, - files_to_copy: None, - duplicated_files_detected: vec![], - is_select: false, - default_values, - copy_into_location_options: Default::default(), - }, - values_consts: vec![], - required_source_schema: required_values_schema.clone(), - required_values_schema: required_values_schema.clone(), - write_mode: CopyIntoTableMode::Copy, - query: None, - - enable_distributed: false, - }) + // source: crate::plans::AppendSource::Stage(Box::new(StageTableInfo { + // schema: stage_schema, + // files_info, + // stage_info, + // files_to_copy: None, + // duplicated_files_detected: vec![], + // is_select: false, + // default_values, + // copy_into_location_options: Default::default(), + // })), + + // let source = SExpr::create_leaf(Arc::new(plan)); + // let copy_into = CopyIntoTablePlan { + // catalog_name, + // database_name, + // table_name, + // force: stmt.force, + // values_consts: vec![], + // required_source_schema: required_values_schema.clone(), + // required_values_schema: required_values_schema.clone(), + // }; + // let copy_into_table = SExpr::create_unary(Arc::new(plan), source); + + // Ok() + todo!() } /// Bind COPY INFO FROM @@ -223,45 +216,46 @@ impl<'a> Binder { bind_ctx: &BindContext, plan: CopyIntoTablePlan, ) -> Result { - let use_query = matches!(&plan.stage_table_info.stage_info.file_format_params, - FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); - - if use_query { - let mut select_list = Vec::with_capacity(plan.required_source_schema.num_fields()); - for dest_field in plan.required_source_schema.fields().iter() { - let column = Expr::ColumnRef { - span: None, - column: ColumnRef { - database: None, - table: None, - column: AstColumnID::Name(Identifier::from_name( - None, - dest_field.name().to_string(), - )), - }, - }; - // cast types to variant, tuple will be rewrite as `json_object_keep_null` - let expr = if dest_field.data_type().remove_nullable() == DataType::Variant { - Expr::Cast { - span: None, - expr: Box::new(column), - target_type: TypeName::Variant, - pg_style: false, - } - } else { - column - }; - select_list.push(SelectTarget::AliasedExpr { - expr: Box::new(expr), - alias: None, - }); - } - - self.bind_copy_from_query_into_table(bind_ctx, plan, &select_list, &None) - .await - } else { - Ok(Plan::CopyIntoTable(Box::new(plan))) - } + // let use_query = matches!(&plan.source.as_stage().unwrap().stage_info.file_format_params, + // FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); + + // if use_query { + // let mut select_list = Vec::with_capacity(plan.required_source_schema.num_fields()); + // for dest_field in plan.required_source_schema.fields().iter() { + // let column = Expr::ColumnRef { + // span: None, + // column: ColumnRef { + // database: None, + // table: None, + // column: AstColumnID::Name(Identifier::from_name( + // None, + // dest_field.name().to_string(), + // )), + // }, + // }; + // // cast types to variant, tuple will be rewrite as `json_object_keep_null` + // let expr = if dest_field.data_type().remove_nullable() == DataType::Variant { + // Expr::Cast { + // span: None, + // expr: Box::new(column), + // target_type: TypeName::Variant, + // pg_style: false, + // } + // } else { + // column + // }; + // select_list.push(SelectTarget::AliasedExpr { + // expr: Box::new(expr), + // alias: None, + // }); + // } + + // self.bind_copy_from_query_into_table(bind_ctx, plan, &select_list, &None) + // .await + // } else { + // Ok(Plan::CopyIntoTable(Box::new(plan))) + // } + todo!() } #[async_backtrace::framed] @@ -323,9 +317,6 @@ impl<'a> Binder { .await? }; - let catalog = self.ctx.get_catalog(&catalog_name).await?; - let catalog_info = catalog.info(); - let thread_num = self.ctx.get_settings().get_max_threads()? as usize; let (stage_info, files_info) = self.bind_attachment(attachment).await?; @@ -338,7 +329,7 @@ impl<'a> Binder { // as the vanilla Copy-Into does. // thus, we do not care about the "duplicated_files_detected", just set it to empty vector. let files_to_copy = list_stage_files(&stage_info, &files_info, thread_num, None).await?; - let duplicated_files_detected = vec![]; + // let duplicated_files_detected = vec![]; let stage_schema = infer_table_schema(&data_schema)?; @@ -346,36 +337,31 @@ impl<'a> Binder { .prepare_default_values(bind_context, &data_schema) .await?; - let plan = CopyIntoTablePlan { - catalog_info, - database_name, - table_name, - no_file_to_copy: false, - from_attachment: true, - required_source_schema: data_schema.clone(), - required_values_schema, - values_consts: const_columns, - force: true, - stage_table_info: StageTableInfo { - schema: stage_schema, - files_info, - stage_info, - files_to_copy: Some(files_to_copy), - duplicated_files_detected, - is_select: false, - default_values: Some(default_values), - copy_into_location_options: Default::default(), - }, - write_mode, - query: None, - validation_mode: ValidationMode::None, - - enable_distributed: false, - is_transform: false, - }; - - self.bind_copy_into_table_from_location(bind_context, plan) - .await + // let plan = CopyIntoTablePlan { + // catalog_name, + // database_name, + // table_name, + // no_file_to_copy: false, + // required_source_schema: data_schema.clone(), + // required_values_schema, + // values_consts: const_columns, + // force: true, + // source: crate::plans::AppendSource::Stage(Box::new(StageTableInfo { + // schema: stage_schema, + // files_info, + // stage_info, + // files_to_copy: Some(files_to_copy), + // duplicated_files_detected, + // is_select: false, + // default_values: Some(default_values), + // copy_into_location_options: Default::default(), + // })), + // enable_distributed: false, + // }; + + // self.bind_copy_into_table_from_location(bind_context, plan) + // .await + todo!() } /// Bind COPY INTO
FROM @@ -387,93 +373,94 @@ impl<'a> Binder { select_list: &'a [SelectTarget], alias: &Option, ) -> Result { - plan.collect_files(self.ctx.as_ref()).await?; - if plan.no_file_to_copy { - return Ok(Plan::CopyIntoTable(Box::new(plan))); - } - - let table_ctx = self.ctx.clone(); - let (s_expr, mut from_context) = self - .bind_stage_table( - table_ctx, - bind_context, - plan.stage_table_info.stage_info.clone(), - plan.stage_table_info.files_info.clone(), - alias, - plan.stage_table_info.files_to_copy.clone(), - ) - .await?; - - // Generate an analyzed select list with from context - let select_list = self.normalize_select_list(&mut from_context, select_list)?; - - for item in select_list.items.iter() { - if !self.check_allowed_scalar_expr_with_subquery_for_copy_table(&item.scalar)? { - // in fact, if there is a join, we will stop in `check_transform_query()` - return Err(ErrorCode::SemanticError( - "copy into table source can't contain window|aggregate|join functions" - .to_string(), - )); - }; - } - let (scalar_items, projections) = self.analyze_projection( - &from_context.aggregate_info, - &from_context.windows, - &select_list, - )?; - - if projections.len() != plan.required_source_schema.num_fields() { - return Err(ErrorCode::BadArguments(format!( - "Number of columns in select list ({}) does not match that of the corresponding table ({})", - projections.len(), - plan.required_source_schema.num_fields(), - ))); - } - - let mut s_expr = - self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?; - - // rewrite async function and udf - s_expr = self.rewrite_udf(&mut from_context, s_expr)?; - - let mut output_context = BindContext::new(); - output_context.parent = from_context.parent; - output_context.columns = from_context.columns; - - // disable variant check to allow copy invalid JSON into tables - let disable_variant_check = plan - .stage_table_info - .stage_info - .copy_options - .disable_variant_check; - if disable_variant_check { - let hints = Hint { - hints_list: vec![HintItem { - name: Identifier::from_name(None, "disable_variant_check"), - expr: Expr::Literal { - span: None, - value: Literal::UInt64(1), - }, - }], - }; - if let Some(e) = self.opt_hints_set_var(&mut output_context, &hints).err() { - warn!( - "In COPY resolve optimize hints {:?} failed, err: {:?}", - hints, e - ); - } - } - - plan.query = Some(Box::new(Plan::Query { - s_expr: Box::new(s_expr), - metadata: self.metadata.clone(), - bind_context: Box::new(output_context), - rewrite_kind: None, - ignore_result: false, - formatted_ast: None, - })); - - Ok(Plan::CopyIntoTable(Box::new(plan))) + // plan.collect_files(self.ctx.as_ref()).await?; + // if plan.no_file_to_copy { + // return Ok(Plan::CopyIntoTable(Box::new(plan))); + // } + + // let stage_table_info = plan.source.as_stage().unwrap(); + // let table_ctx = self.ctx.clone(); + // let (s_expr, mut from_context) = self + // .bind_stage_table( + // table_ctx, + // bind_context, + // stage_table_info.stage_info.clone(), + // stage_table_info.files_info.clone(), + // alias, + // stage_table_info.files_to_copy.clone(), + // ) + // .await?; + + // // Generate an analyzed select list with from context + // let select_list = self.normalize_select_list(&mut from_context, select_list)?; + + // for item in select_list.items.iter() { + // if !self.check_allowed_scalar_expr_with_subquery_for_copy_table(&item.scalar)? { + // // in fact, if there is a join, we will stop in `check_transform_query()` + // return Err(ErrorCode::SemanticError( + // "copy into table source can't contain window|aggregate|join functions" + // .to_string(), + // )); + // }; + // } + // let (scalar_items, projections) = self.analyze_projection( + // &from_context.aggregate_info, + // &from_context.windows, + // &select_list, + // )?; + + // if projections.len() != plan.required_source_schema.num_fields() { + // return Err(ErrorCode::BadArguments(format!( + // "Number of columns in select list ({}) does not match that of the corresponding table ({})", + // projections.len(), + // plan.required_source_schema.num_fields(), + // ))); + // } + + // let mut s_expr = + // self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?; + + // // rewrite async function and udf + // s_expr = self.rewrite_udf(&mut from_context, s_expr)?; + + // let mut output_context = BindContext::new(); + // output_context.parent = from_context.parent; + // output_context.columns = from_context.columns; + + // // disable variant check to allow copy invalid JSON into tables + // let disable_variant_check = stage_table_info + // .stage_info + // .copy_options + // .disable_variant_check; + // if disable_variant_check { + // let hints = Hint { + // hints_list: vec![HintItem { + // name: Identifier::from_name(None, "disable_variant_check"), + // expr: Expr::Literal { + // span: None, + // value: Literal::UInt64(1), + // }, + // }], + // }; + // if let Some(e) = self.opt_hints_set_var(&mut output_context, &hints).err() { + // warn!( + // "In COPY resolve optimize hints {:?} failed, err: {:?}", + // hints, e + // ); + // } + // } + + // plan.source = AppendSource::Query(Box::new(Plan::Query { + // s_expr: Box::new(s_expr), + // metadata: self.metadata.clone(), + // bind_context: Box::new(output_context), + // rewrite_kind: None, + // ignore_result: false, + // formatted_ast: None, + // })); + + // Ok(Plan::CopyIntoTable(Box::new(plan))) + todo!() } #[async_backtrace::framed] diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index c830878ea7e02..be0166e4b4353 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -20,6 +20,7 @@ use databend_common_ast::ast::InsertStmt; use databend_common_ast::ast::Statement; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::DataSchema; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRefExt; @@ -27,6 +28,7 @@ use super::util::TableIdentifier; use crate::binder::Binder; use crate::normalize_identifier; use crate::plans::CopyIntoTableMode; +use crate::plans::CopyIntoTablePlan; use crate::plans::Insert; use crate::plans::InsertInputSource; use crate::plans::InsertValue; @@ -67,8 +69,106 @@ impl Binder { Ok(TableSchemaRefExt::create(fields)) } + // #[async_backtrace::framed] + // pub(in crate::planner::binder) async fn bind_insert( + // &mut self, + // bind_context: &mut BindContext, + // stmt: &InsertStmt, + // ) -> Result { + // let InsertStmt { + // with, + // catalog, + // database, + // table, + // columns, + // source, + // overwrite, + // .. + // } = stmt; + + // self.init_cte(bind_context, with)?; + + // let table_identifier = TableIdentifier::new(self, catalog, database, table, &None); + // let (catalog_name, database_name, table_name) = ( + // table_identifier.catalog_name(), + // table_identifier.database_name(), + // table_identifier.table_name(), + // ); + + // let table = self + // .ctx + // .get_table(&catalog_name, &database_name, &table_name) + // .await + // .map_err(|err| table_identifier.not_found_suggest_error(err))?; + + // let schema = self.schema_project(&table.schema(), columns)?; + + // let input_source: Result = match source.clone() { + // InsertSource::Values { rows } => { + // let mut new_rows = Vec::with_capacity(rows.len()); + // for row in rows { + // let new_row = bind_context + // .exprs_to_scalar( + // &row, + // &Arc::new(schema.clone().into()), + // self.ctx.clone(), + // &self.name_resolution_ctx, + // self.metadata.clone(), + // ) + // .await?; + // new_rows.push(new_row); + // } + // Ok(InsertInputSource::Values(InsertValue::Values { + // rows: new_rows, + // })) + // } + // InsertSource::RawValues { rest_str, start } => { + // let values_str = rest_str.trim_end_matches(';').trim_start().to_owned(); + // match self.ctx.get_stage_attachment() { + // Some(attachment) => { + // return self + // .bind_copy_from_attachment( + // bind_context, + // attachment, + // catalog_name, + // database_name, + // table_name, + // Arc::new(schema.into()), + // &values_str, + // CopyIntoTableMode::Insert { + // overwrite: *overwrite, + // }, + // ) + // .await; + // } + // None => Ok(InsertInputSource::Values(InsertValue::RawValues { + // data: rest_str, + // start, + // })), + // } + // } + // InsertSource::Select { query } => { + // let statement = Statement::Query(query); + // let select_plan = self.bind_statement(bind_context, &statement).await?; + // Ok(InsertInputSource::SelectPlan(Box::new(select_plan))) + // } + // }; + + // let plan = Insert { + // catalog: catalog_name.to_string(), + // database: database_name.to_string(), + // table: table_name, + // schema, + // overwrite: *overwrite, + // source: input_source?, + // table_info: None, + // }; + + // Ok(Plan::Insert(Box::new(plan))) + // } + #[async_backtrace::framed] - pub(in crate::planner::binder) async fn bind_insert( + pub(in crate::planner::binder) async fn bind_insert_to_copy( &mut self, bind_context: &mut BindContext, stmt: &InsertStmt, @@ -83,7 +183,6 @@ impl Binder { overwrite, .. } = stmt; - self.init_cte(bind_context, with)?; let table_identifier = TableIdentifier::new(self, catalog, database, table, &None); @@ -99,69 +198,23 @@ impl Binder { .await .map_err(|err| table_identifier.not_found_suggest_error(err))?; - let schema = self.schema_project(&table.schema(), columns)?; - - let input_source: Result = match source.clone() { - InsertSource::Values { rows } => { - let mut new_rows = Vec::with_capacity(rows.len()); - for row in rows { - let new_row = bind_context - .exprs_to_scalar( - &row, - &Arc::new(schema.clone().into()), - self.ctx.clone(), - &self.name_resolution_ctx, - self.metadata.clone(), - ) - .await?; - new_rows.push(new_row); - } - Ok(InsertInputSource::Values(InsertValue::Values { - rows: new_rows, - })) - } - InsertSource::RawValues { rest_str, start } => { - let values_str = rest_str.trim_end_matches(';').trim_start().to_owned(); - match self.ctx.get_stage_attachment() { - Some(attachment) => { - return self - .bind_copy_from_attachment( - bind_context, - attachment, - catalog_name, - database_name, - table_name, - Arc::new(schema.into()), - &values_str, - CopyIntoTableMode::Insert { - overwrite: *overwrite, - }, - ) - .await; - } - None => Ok(InsertInputSource::Values(InsertValue::RawValues { - data: rest_str, - start, - })), - } - } - InsertSource::Select { query } => { - let statement = Statement::Query(query); - let select_plan = self.bind_statement(bind_context, &statement).await?; - Ok(InsertInputSource::SelectPlan(Box::new(select_plan))) - } - }; + let schema: Arc = + Arc::new(self.schema_project(&table.schema(), columns)?.into()); - let plan = Insert { - catalog: catalog_name.to_string(), - database: database_name.to_string(), - table: table_name, - schema, - overwrite: *overwrite, - source: input_source?, - table_info: None, - }; + // let plan = CopyIntoTablePlan { + // no_file_to_copy: false, + // catalog_name, + // database_name, + // table_name, + // required_values_schema: schema.clone(), + // values_consts: todo!(), + // required_source_schema: schema, + // force: todo!(), + // enable_distributed: false, + // source: todo!(), + // }; - Ok(Plan::Insert(Box::new(plan))) + // Ok(Plan::CopyIntoTable(Box::new(plan))) + todo!() } } diff --git a/src/query/sql/src/planner/format/display_plan.rs b/src/query/sql/src/planner/format/display_plan.rs index 543942bab07e0..62029933d0bea 100644 --- a/src/query/sql/src/planner/format/display_plan.rs +++ b/src/query/sql/src/planner/format/display_plan.rs @@ -40,7 +40,7 @@ impl Plan { Plan::ExplainSyntax { .. } => Ok("ExplainSyntax".to_string()), Plan::ExplainAnalyze { .. } => Ok("ExplainAnalyze".to_string()), - Plan::CopyIntoTable(_) => Ok("CopyIntoTable".to_string()), + Plan::CopyIntoTable { .. } => Ok("CopyIntoTable".to_string()), Plan::CopyIntoLocation(_) => Ok("CopyIntoLocation".to_string()), // catalog @@ -109,8 +109,6 @@ impl Plan { Plan::DropVirtualColumn(_) => Ok("DropVirtualColumn".to_string()), Plan::RefreshVirtualColumn(_) => Ok("RefreshVirtualColumn".to_string()), - // Insert - Plan::Insert(_) => Ok("Insert".to_string()), Plan::InsertMultiTable(_) => Ok("InsertMultiTable".to_string()), Plan::Replace(_) => Ok("Replace".to_string()), Plan::DataMutation { s_expr, .. } => format_merge_into(s_expr), diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index cca17e8a9983a..f20ef7dc27eb9 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -286,40 +286,40 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result from: Box::new(Box::pin(optimize(opt_ctx, *from)).await?), options, })), - Plan::CopyIntoTable(mut plan) if !plan.no_file_to_copy => { - plan.enable_distributed = opt_ctx.enable_distributed_optimization - && opt_ctx - .table_ctx - .get_settings() - .get_enable_distributed_copy()?; - info!( - "after optimization enable_distributed_copy? : {}", - plan.enable_distributed - ); - - if let Some(p) = &plan.query { - let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; - plan.query = Some(Box::new(optimized_plan)); - } - Ok(Plan::CopyIntoTable(plan)) - } + // Plan::CopyIntoTable(mut plan) if !plan.no_file_to_copy => { + // plan.enable_distributed = opt_ctx.enable_distributed_optimization + // && opt_ctx + // .table_ctx + // .get_settings() + // .get_enable_distributed_copy()?; + // info!( + // "after optimization enable_distributed_copy? : {}", + // plan.enable_distributed + // ); + + // // if let Some(p) = &plan.query { + // // let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; + // // plan.query = Some(Box::new(optimized_plan)); + // // } + // Ok(Plan::CopyIntoTable(plan)) + // } Plan::DataMutation { s_expr, .. } => optimize_mutation(opt_ctx, *s_expr).await, // distributed insert will be optimized in `physical_plan_builder` - Plan::Insert(mut plan) => { - match plan.source { - InsertInputSource::SelectPlan(p) => { - let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; - plan.source = InsertInputSource::SelectPlan(Box::new(optimized_plan)); - } - InsertInputSource::Stage(p) => { - let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; - plan.source = InsertInputSource::Stage(Box::new(optimized_plan)); - } - _ => {} - } - Ok(Plan::Insert(plan)) - } + // Plan::Insert(mut plan) => { + // match plan.source { + // InsertInputSource::SelectPlan(p) => { + // let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; + // plan.source = InsertInputSource::SelectPlan(Box::new(optimized_plan)); + // } + // InsertInputSource::Stage(p) => { + // let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; + // plan.source = InsertInputSource::Stage(Box::new(optimized_plan)); + // } + // _ => {} + // } + // Ok(Plan::Insert(plan)) + // } Plan::InsertMultiTable(mut plan) => { plan.input_source = optimize(opt_ctx.clone(), plan.input_source.clone()).await?; Ok(Plan::InsertMultiTable(plan)) diff --git a/src/query/sql/src/planner/plans/copy_into_table.rs b/src/query/sql/src/planner/plans/copy_into_table.rs index d11050411cfdf..9196ede8ffceb 100644 --- a/src/query/sql/src/planner/plans/copy_into_table.rs +++ b/src/query/sql/src/planner/plans/copy_into_table.rs @@ -33,11 +33,13 @@ use databend_common_expression::DataSchemaRefExt; use databend_common_expression::Scalar; use databend_common_meta_app::principal::COPY_MAX_FILES_COMMIT_MSG; use databend_common_meta_app::principal::COPY_MAX_FILES_PER_COMMIT; -use databend_common_meta_app::schema::CatalogInfo; use databend_common_metrics::storage::*; use databend_common_storage::init_stage_operator; +use databend_common_storage::StageFileInfo; +use enum_as_inner::EnumAsInner; use log::info; +use super::InsertValue; use crate::plans::Plan; #[derive(PartialEq, Eq, Clone, Debug, serde::Serialize, serde::Deserialize)] @@ -115,169 +117,180 @@ impl CopyIntoTableMode { #[derive(Clone)] pub struct CopyIntoTablePlan { - pub no_file_to_copy: bool, - - pub catalog_info: Arc, + pub catalog_name: String, pub database_name: String, pub table_name: String, - pub from_attachment: bool, - pub required_values_schema: DataSchemaRef, - // ... into table() .. -> pub values_consts: Vec, - // (1, ?, 'a', ?) -> (1, 'a') - pub required_source_schema: DataSchemaRef, // (1, ?, 'a', ?) -> (?, ?) - - pub write_mode: CopyIntoTableMode, - pub validation_mode: ValidationMode, + pub required_source_schema: DataSchemaRef, pub force: bool, +} - pub stage_table_info: StageTableInfo, - pub query: Option>, - // query may be Some even if is_transform=false - pub is_transform: bool, - - pub enable_distributed: bool, +#[derive(Clone, EnumAsInner)] +pub enum AppendSource { + Query(Box), + Stage(Box), + Values(InsertValue), } -impl CopyIntoTablePlan { - pub async fn collect_files(&mut self, ctx: &dyn TableContext) -> Result<()> { - ctx.set_status_info("begin to list files"); - let start = Instant::now(); - - let stage_table_info = &self.stage_table_info; - let max_files = stage_table_info.stage_info.copy_options.max_files; - let max_files = if max_files == 0 { - None - } else { - Some(max_files) - }; - - let thread_num = ctx.get_settings().get_max_threads()? as usize; - let operator = init_stage_operator(&stage_table_info.stage_info)?; - let all_source_file_infos = if operator.info().native_capability().blocking { - if self.force { - stage_table_info - .files_info - .blocking_list(&operator, max_files) - } else { - stage_table_info.files_info.blocking_list(&operator, None) - } - } else if self.force { - stage_table_info - .files_info - .list(&operator, thread_num, max_files) - .await - } else { - stage_table_info - .files_info - .list(&operator, thread_num, None) - .await - }?; - - let num_all_files = all_source_file_infos.len(); - - let end_get_all_source = Instant::now(); - let cost_get_all_files = end_get_all_source.duration_since(start).as_millis(); - metrics_inc_copy_collect_files_get_all_source_files_milliseconds(cost_get_all_files as u64); - - ctx.set_status_info(&format!( - "end list files: got {} files, time used {:?}", - num_all_files, - start.elapsed() - )); - - let (need_copy_file_infos, duplicated) = if self.force { - if !self.stage_table_info.stage_info.copy_options.purge - && all_source_file_infos.len() > COPY_MAX_FILES_PER_COMMIT - { - return Err(ErrorCode::Internal(COPY_MAX_FILES_COMMIT_MSG)); - } - info!( - "force mode, ignore file filtering. ({}.{})", - &self.database_name, &self.table_name - ); - (all_source_file_infos, vec![]) - } else { - // Status. - ctx.set_status_info("begin filtering out copied files"); - - let filter_start = Instant::now(); - let FilteredCopyFiles { - files_to_copy, - duplicated_files, - } = ctx - .filter_out_copied_files( - self.catalog_info.catalog_name(), - &self.database_name, - &self.table_name, - &all_source_file_infos, - max_files, - ) - .await?; - ctx.set_status_info(&format!( - "end filtering out copied files: {}, time used {:?}", - num_all_files, - filter_start.elapsed() - )); - - let end_filter_out = Instant::now(); - let cost_filter_out = end_filter_out - .duration_since(end_get_all_source) - .as_millis(); - metrics_inc_copy_filter_out_copied_files_entire_milliseconds(cost_filter_out as u64); - - (files_to_copy, duplicated_files) - }; - - let num_copied_files = need_copy_file_infos.len(); - let copied_bytes: u64 = need_copy_file_infos.iter().map(|i| i.size).sum(); - - info!( - "collect files with max_files={:?} finished, need to copy {} files, {} bytes; skip {} duplicated files, time used:{:?}", - max_files, - need_copy_file_infos.len(), - copied_bytes, - num_all_files - num_copied_files, - start.elapsed() - ); - - if need_copy_file_infos.is_empty() { - self.no_file_to_copy = true; +impl AppendSource { + pub fn files_to_copy(&self) -> Vec { + match self { + AppendSource::Stage(stage) => stage.files_to_copy.clone().unwrap_or_default(), + _ => vec![], } + } - self.stage_table_info.files_to_copy = Some(need_copy_file_infos); - self.stage_table_info.duplicated_files_detected = duplicated; + pub fn duplicated_files_detected(&self) -> Vec { + match self { + AppendSource::Stage(stage) => stage.duplicated_files_detected.clone(), + _ => vec![], + } + } +} - Ok(()) +impl CopyIntoTablePlan { + pub async fn collect_files(&mut self, ctx: &dyn TableContext) -> Result<()> { + // ctx.set_status_info("begin to list files"); + // let start = Instant::now(); + + // let stage_table_info = self.source.as_stage().unwrap(); + // let max_files = stage_table_info.stage_info.copy_options.max_files; + // let max_files = if max_files == 0 { + // None + // } else { + // Some(max_files) + // }; + + // let thread_num = ctx.get_settings().get_max_threads()? as usize; + // let operator = init_stage_operator(&stage_table_info.stage_info)?; + // let all_source_file_infos = if operator.info().native_capability().blocking { + // if self.force { + // stage_table_info + // .files_info + // .blocking_list(&operator, max_files) + // } else { + // stage_table_info.files_info.blocking_list(&operator, None) + // } + // } else if self.force { + // stage_table_info + // .files_info + // .list(&operator, thread_num, max_files) + // .await + // } else { + // stage_table_info + // .files_info + // .list(&operator, thread_num, None) + // .await + // }?; + + // let num_all_files = all_source_file_infos.len(); + + // let end_get_all_source = Instant::now(); + // let cost_get_all_files = end_get_all_source.duration_since(start).as_millis(); + // metrics_inc_copy_collect_files_get_all_source_files_milliseconds(cost_get_all_files as u64); + + // ctx.set_status_info(&format!( + // "end list files: got {} files, time used {:?}", + // num_all_files, + // start.elapsed() + // )); + + // let (need_copy_file_infos, duplicated) = if self.force { + // if !stage_table_info.stage_info.copy_options.purge + // && all_source_file_infos.len() > COPY_MAX_FILES_PER_COMMIT + // { + // return Err(ErrorCode::Internal(COPY_MAX_FILES_COMMIT_MSG)); + // } + // info!( + // "force mode, ignore file filtering. ({}.{})", + // &self.database_name, &self.table_name + // ); + // (all_source_file_infos, vec![]) + // } else { + // // Status. + // ctx.set_status_info("begin filtering out copied files"); + + // let filter_start = Instant::now(); + // let FilteredCopyFiles { + // files_to_copy, + // duplicated_files, + // } = ctx + // .filter_out_copied_files( + // &self.catalog_name, + // &self.database_name, + // &self.table_name, + // &all_source_file_infos, + // max_files, + // ) + // .await?; + // ctx.set_status_info(&format!( + // "end filtering out copied files: {}, time used {:?}", + // num_all_files, + // filter_start.elapsed() + // )); + + // let end_filter_out = Instant::now(); + // let cost_filter_out = end_filter_out + // .duration_since(end_get_all_source) + // .as_millis(); + // metrics_inc_copy_filter_out_copied_files_entire_milliseconds(cost_filter_out as u64); + + // (files_to_copy, duplicated_files) + // }; + + // let num_copied_files = need_copy_file_infos.len(); + // let copied_bytes: u64 = need_copy_file_infos.iter().map(|i| i.size).sum(); + + // info!( + // "collect files with max_files={:?} finished, need to copy {} files, {} bytes; skip {} duplicated files, time used:{:?}", + // max_files, + // need_copy_file_infos.len(), + // copied_bytes, + // num_all_files - num_copied_files, + // start.elapsed() + // ); + + // if need_copy_file_infos.is_empty() { + // self.no_file_to_copy = true; + // } + + // let stage_table_info = self.source.as_stage_mut().unwrap(); + + // stage_table_info.files_to_copy = Some(need_copy_file_infos); + // stage_table_info.duplicated_files_detected = duplicated; + + // Ok(()) + todo!() } } impl Debug for CopyIntoTablePlan { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let CopyIntoTablePlan { - catalog_info, - database_name, - table_name, - no_file_to_copy, - validation_mode, - force, - stage_table_info, - query, - .. - } = self; - write!( - f, - "Copy into {:}.{database_name:}.{table_name:}", - catalog_info.catalog_name() - )?; - write!(f, ", no_file_to_copy: {no_file_to_copy:?}")?; - write!(f, ", validation_mode: {validation_mode:?}")?; - write!(f, ", from: {stage_table_info:?}")?; - write!(f, " force: {force}")?; - write!(f, " is_from: {force}")?; - write!(f, " query: {query:?}")?; - Ok(()) + // let CopyIntoTablePlan { + // catalog_name: catalog_info, + // database_name, + // table_name, + // no_file_to_copy, + // validation_mode, + // force, + // stage_table_info, + // query, + // .. + // } = self; + // write!( + // f, + // "Copy into {:}.{database_name:}.{table_name:}", + // &catalog_info + // )?; + // write!(f, ", no_file_to_copy: {no_file_to_copy:?}")?; + // write!(f, ", validation_mode: {validation_mode:?}")?; + // write!(f, ", from: {stage_table_info:?}")?; + // write!(f, " force: {force}")?; + // write!(f, " is_from: {force}")?; + // write!(f, " query: {query:?}")?; + // Ok(()) + todo!() } } @@ -301,10 +314,11 @@ impl CopyIntoTablePlan { } pub fn schema(&self) -> DataSchemaRef { - if self.from_attachment { - Arc::new(DataSchema::empty()) - } else { - Self::copy_into_table_schema() - } + // if self.from_attachment { + // Arc::new(DataSchema::empty()) + // } else { + // Self::copy_into_table_schema() + // } + todo!() } } diff --git a/src/query/sql/src/planner/plans/insert.rs b/src/query/sql/src/planner/plans/insert.rs index f20a2c0830ee2..37ae25f41240b 100644 --- a/src/query/sql/src/planner/plans/insert.rs +++ b/src/query/sql/src/planner/plans/insert.rs @@ -27,7 +27,6 @@ use serde::Deserialize; use serde::Serialize; use super::Plan; -use crate::plans::CopyIntoTablePlan; #[derive(Clone, Debug, EnumAsInner)] pub enum InsertInputSource { @@ -154,55 +153,53 @@ pub(crate) fn format_insert_source( ) .format_pretty()?), }, - InsertInputSource::Stage(plan) => match *plan.clone() { - Plan::CopyIntoTable(copy_plan) => { - let CopyIntoTablePlan { - no_file_to_copy, - from_attachment, - required_values_schema, - required_source_schema, - write_mode, - validation_mode, - force, - stage_table_info, - enable_distributed, - .. - } = &*copy_plan; - let required_values_schema = required_values_schema - .fields() - .iter() - .map(|field| field.name().to_string()) - .collect::>() - .join(","); - let required_source_schema = required_source_schema - .fields() - .iter() - .map(|field| field.name().to_string()) - .collect::>() - .join(","); - let stage_node = vec![ - FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), - FormatTreeNode::new(format!("from_attachment: {from_attachment}")), - FormatTreeNode::new(format!( - "required_values_schema: [{required_values_schema}]" - )), - FormatTreeNode::new(format!( - "required_source_schema: [{required_source_schema}]" - )), - FormatTreeNode::new(format!("write_mode: {write_mode}")), - FormatTreeNode::new(format!("validation_mode: {validation_mode}")), - FormatTreeNode::new(format!("force: {force}")), - FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), - FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), - ]; - children.extend(stage_node); - Ok( - FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) - .format_pretty()?, - ) - } - _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - }, + _ => todo!() + // InsertInputSource::Stage(plan) => match *plan.clone() { + // Plan::CopyIntoTable(copy_plan) => { + // let CopyIntoTablePlan { + // no_file_to_copy, + // required_values_schema, + // required_source_schema, + // force, + // source, + // enable_distributed, + // .. + // } = &*copy_plan; + // let required_values_schema = required_values_schema + // .fields() + // .iter() + // .map(|field| field.name().to_string()) + // .collect::>() + // .join(","); + // let required_source_schema = required_source_schema + // .fields() + // .iter() + // .map(|field| field.name().to_string()) + // .collect::>() + // .join(","); + // let stage_node = vec![ + // FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), + // FormatTreeNode::new(format!("from_attachment: {from_attachment}")), + // FormatTreeNode::new(format!( + // "required_values_schema: [{required_values_schema}]" + // )), + // FormatTreeNode::new(format!( + // "required_source_schema: [{required_source_schema}]" + // )), + // FormatTreeNode::new(format!("write_mode: {write_mode}")), + // FormatTreeNode::new(format!("validation_mode: {validation_mode}")), + // FormatTreeNode::new(format!("force: {force}")), + // FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), + // FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), + // ]; + // children.extend(stage_node); + // Ok( + // FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) + // .format_pretty()?, + // ) + // } + // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), + // }, } } diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index 0a3efacdc7536..151e052e00f10 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -42,7 +42,6 @@ use crate::plans::AlterViewPlan; use crate::plans::AlterVirtualColumnPlan; use crate::plans::AnalyzeTablePlan; use crate::plans::CallProcedurePlan; -use crate::plans::CopyIntoTableMode; use crate::plans::CopyIntoTablePlan; use crate::plans::CreateCatalogPlan; use crate::plans::CreateConnectionPlan; @@ -231,9 +230,6 @@ pub enum Plan { s_expr: Box, need_purge: bool, }, - - // Insert - Insert(Box), InsertMultiTable(Box), Replace(Box), DataMutation { @@ -242,7 +238,9 @@ pub enum Plan { metadata: MetadataRef, }, - CopyIntoTable(Box), + CopyIntoTable { + s_expr: Box, + }, CopyIntoLocation(CopyIntoLocationPlan), // Views @@ -413,24 +411,25 @@ pub enum RewriteKind { impl Plan { pub fn kind(&self) -> QueryKind { - match self { - Plan::Query { .. } => QueryKind::Query, - Plan::CopyIntoTable(copy_plan) => match copy_plan.write_mode { - CopyIntoTableMode::Insert { .. } => QueryKind::Insert, - _ => QueryKind::CopyIntoTable, - }, - Plan::Explain { .. } - | Plan::ExplainAnalyze { .. } - | Plan::ExplainAst { .. } - | Plan::ExplainSyntax { .. } => QueryKind::Explain, - Plan::Insert(_) => QueryKind::Insert, - Plan::Replace(_) - | Plan::DataMutation { .. } - | Plan::OptimizePurge(_) - | Plan::OptimizeCompactSegment(_) - | Plan::OptimizeCompactBlock { .. } => QueryKind::Update, - _ => QueryKind::Other, - } + // match self { + // Plan::Query { .. } => QueryKind::Query, + // Plan::CopyIntoTable(copy_plan) => match copy_plan.write_mode { + // CopyIntoTableMode::Insert { .. } => QueryKind::Insert, + // _ => QueryKind::CopyIntoTable, + // }, + // Plan::Explain { .. } + // | Plan::ExplainAnalyze { .. } + // | Plan::ExplainAst { .. } + // | Plan::ExplainSyntax { .. } => QueryKind::Explain, + // Plan::Insert(_) => QueryKind::Insert, + // Plan::Replace(_) + // | Plan::DataMutation { .. } + // | Plan::OptimizePurge(_) + // | Plan::OptimizeCompactSegment(_) + // | Plan::OptimizeCompactBlock { .. } => QueryKind::Update, + // _ => QueryKind::Other, + // } + todo!() } } @@ -476,7 +475,7 @@ impl Plan { Plan::DescNetworkPolicy(plan) => plan.schema(), Plan::ShowNetworkPolicies(plan) => plan.schema(), Plan::DescPasswordPolicy(plan) => plan.schema(), - Plan::CopyIntoTable(plan) => plan.schema(), + // Plan::CopyIntoTable(plan) => plan.schema(), Plan::CopyIntoLocation(plan) => plan.schema(), Plan::CreateTask(plan) => plan.schema(), Plan::DescribeTask(plan) => plan.schema(), From 8224cc6fc7562a10c314aa3510836cf02b32e7ad Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 26 Nov 2024 00:09:28 +0800 Subject: [PATCH 02/22] update --- .../interpreter_copy_into_table.rs | 430 +++++----------- .../src/interpreters/interpreter_factory.rs | 17 +- .../src/interpreters/interpreter_insert.rs | 261 ---------- .../src/interpreters/interpreter_replace.rs | 47 +- .../interpreters/interpreter_table_create.rs | 49 +- src/query/service/src/interpreters/mod.rs | 2 - .../builders/builder_copy_into_table.rs | 166 +++--- .../builders/builder_replace_into.rs | 59 +-- .../src/pipelines/builders/builder_scan.rs | 35 ++ .../service/src/pipelines/pipeline_builder.rs | 1 + .../transform_recursive_cte_source.rs | 1 + .../src/schedulers/fragments/fragmenter.rs | 21 +- .../src/schedulers/fragments/plan_fragment.rs | 23 +- src/query/sql/src/executor/format.rs | 3 + src/query/sql/src/executor/physical_plan.rs | 22 +- .../sql/src/executor/physical_plan_builder.rs | 4 + .../sql/src/executor/physical_plan_visitor.rs | 38 +- .../sql/src/executor/physical_plans/common.rs | 4 +- .../sql/src/executor/physical_plans/mod.rs | 2 + .../physical_copy_into_table.rs | 17 +- .../physical_plans/physical_value_scan.rs | 38 ++ src/query/sql/src/planner/binder/binder.rs | 3 +- .../sql/src/planner/binder/copy_into_table.rs | 476 ++++++++++-------- src/query/sql/src/planner/binder/insert.rs | 212 ++++---- src/query/sql/src/planner/binder/replace.rs | 2 - src/query/sql/src/planner/binder/util.rs | 2 + .../decorrelate/subquery_rewriter.rs | 2 + .../dynamic_sample/dynamic_sample.rs | 2 + src/query/sql/src/planner/optimizer/format.rs | 2 + .../src/planner/optimizer/hyper_dp/dphyp.rs | 4 +- .../sql/src/planner/optimizer/optimizer.rs | 74 +-- .../rule/rewrite/rule_semi_to_inner_join.rs | 4 +- src/query/sql/src/planner/optimizer/s_expr.rs | 4 + .../sql/src/planner/plans/copy_into_table.rs | 390 +++++++------- src/query/sql/src/planner/plans/insert.rs | 165 +++--- src/query/sql/src/planner/plans/mod.rs | 2 + src/query/sql/src/planner/plans/operator.rs | 68 +++ src/query/sql/src/planner/plans/plan.rs | 39 +- src/query/sql/src/planner/plans/value_scan.rs | 70 +++ 39 files changed, 1261 insertions(+), 1500 deletions(-) delete mode 100644 src/query/service/src/interpreters/interpreter_insert.rs create mode 100644 src/query/sql/src/executor/physical_plans/physical_value_scan.rs create mode 100644 src/query/sql/src/planner/plans/value_scan.rs diff --git a/src/query/service/src/interpreters/interpreter_copy_into_table.rs b/src/query/service/src/interpreters/interpreter_copy_into_table.rs index 1da643d5aa2f5..8cc0b67c88588 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_table.rs +++ b/src/query/service/src/interpreters/interpreter_copy_into_table.rs @@ -12,30 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; use std::sync::Arc; use databend_common_catalog::lock::LockTableOption; -use databend_common_catalog::plan::StageTableInfo; use databend_common_exception::Result; use databend_common_expression::types::Int32Type; use databend_common_expression::types::StringType; use databend_common_expression::DataBlock; use databend_common_expression::FromData; use databend_common_expression::SendableDataBlockStream; -use databend_common_meta_app::schema::UpdateStreamMetaReq; -use databend_common_pipeline_core::Pipeline; -use databend_common_sql::executor::physical_plans::CopyIntoTable; -use databend_common_sql::executor::physical_plans::CopyIntoTableSource; -use databend_common_sql::executor::physical_plans::Exchange; -use databend_common_sql::executor::physical_plans::FragmentKind; use databend_common_sql::executor::physical_plans::MutationKind; -use databend_common_sql::executor::physical_plans::TableScan; -use databend_common_sql::executor::table_read_plan::ToReadDataSourcePlan; -use databend_common_sql::executor::PhysicalPlan; -use databend_common_sql::plans::AppendSource; -use databend_common_storage::StageFileInfo; -use databend_common_storages_stage::StageTable; +use databend_common_sql::executor::PhysicalPlanBuilder; +use databend_common_sql::optimizer::SExpr; +use databend_common_sql::plans::StageContext; use log::debug; use log::info; @@ -43,145 +32,165 @@ use crate::interpreters::common::check_deduplicate_label; use crate::interpreters::common::dml_build_update_stream_req; use crate::interpreters::HookOperator; use crate::interpreters::Interpreter; -use crate::interpreters::SelectInterpreter; use crate::pipelines::PipelineBuildResult; use crate::pipelines::PipelineBuilder; use crate::schedulers::build_query_pipeline_without_render_result_set; use crate::sessions::QueryContext; use crate::sessions::TableContext; use crate::sql::plans::CopyIntoTablePlan; -use crate::sql::plans::Plan; +use crate::sql::MetadataRef; use crate::stream::DataBlockStream; pub struct CopyIntoTableInterpreter { ctx: Arc, - plan: CopyIntoTablePlan, + s_expr: SExpr, + metadata: MetadataRef, + stage_context: Option>, + overwrite: bool, } -impl CopyIntoTableInterpreter { - /// Create a CopyInterpreter with context and [`CopyIntoTablePlan`]. - pub fn try_create(ctx: Arc, plan: CopyIntoTablePlan) -> Result { - Ok(CopyIntoTableInterpreter { ctx, plan }) +#[async_trait::async_trait] +impl Interpreter for CopyIntoTableInterpreter { + fn name(&self) -> &str { + "CopyIntoTableInterpreterV2" } - #[async_backtrace::framed] - async fn build_query( - &self, - query: &Plan, - ) -> Result<(SelectInterpreter, Vec)> { - let (s_expr, metadata, bind_context, formatted_ast) = match query { - Plan::Query { - s_expr, - metadata, - bind_context, - formatted_ast, - .. - } => (s_expr, metadata, bind_context, formatted_ast), - v => unreachable!("Input plan must be Query, but it's {}", v), - }; - - let update_stream_meta = dml_build_update_stream_req(self.ctx.clone(), metadata).await?; - - let select_interpreter = SelectInterpreter::try_create( - self.ctx.clone(), - *(bind_context.clone()), - *s_expr.clone(), - metadata.clone(), - formatted_ast.clone(), - false, - )?; - - Ok((select_interpreter, update_stream_meta)) + fn is_ddl(&self) -> bool { + false } + #[fastrace::trace] #[async_backtrace::framed] - pub async fn build_physical_plan( - &self, - plan: &CopyIntoTablePlan, - ) -> Result<(PhysicalPlan, Vec)> { - // let to_table = self - // .ctx - // .get_table(&plan.catalog_name, &plan.database_name, &plan.table_name) - // .await?; - // let mut update_stream_meta_reqs = vec![]; - // let (source, project_columns) = if let Some(ref query) = plan.query { - // let query = if plan.enable_distributed { - // query.remove_exchange_for_select() - // } else { - // *query.clone() - // }; - - // let (query_interpreter, update_stream_meta) = self.build_query(&query).await?; - // update_stream_meta_reqs = update_stream_meta; - // let query_physical_plan = Box::new(query_interpreter.build_physical_plan().await?); + async fn execute2(&self) -> Result { + debug!("ctx.id" = self.ctx.get_id().as_str(); "copy_into_table_interpreter_execute_v2"); + if check_deduplicate_label(self.ctx.clone()).await? { + return Ok(PipelineBuildResult::create()); + } - // let result_columns = query_interpreter.get_result_columns(); - // ( - // CopyIntoTableSource::Query(query_physical_plan), - // Some(result_columns), - // ) - // } else { - // let stage_table = StageTable::try_create(plan.stage_table_info.clone())?; + // build source and append pipeline + let mut build_res = { + let mut physical_plan_builder = + PhysicalPlanBuilder::new(self.metadata.clone(), self.ctx.clone(), false); + let physical_plan = physical_plan_builder + .build(&self.s_expr, Default::default()) + .await?; + build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await? + }; - // let data_source_plan = stage_table - // .read_plan(self.ctx.clone(), None, None, false, false) - // .await?; + // build commit pipeline + let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; + let target_table = self + .ctx + .get_table( + ©_into_table.catalog_name, + ©_into_table.database_name, + ©_into_table.table_name, + ) + .await?; + let copied_files_meta_req = match &self.stage_context { + Some(stage_context) => PipelineBuilder::build_upsert_copied_files_to_meta_req( + self.ctx.clone(), + target_table.as_ref(), + stage_context.purge, + &stage_context.files_to_copy, + stage_context.force, + )?, + None => None, + }; + let update_stream_meta = + dml_build_update_stream_req(self.ctx.clone(), &self.metadata).await?; + target_table.commit_insertion( + self.ctx.clone(), + &mut build_res.main_pipeline, + copied_files_meta_req, + update_stream_meta, + self.overwrite, + None, + unsafe { self.ctx.get_settings().get_deduplicate_label()? }, + )?; - // let mut name_mapping = BTreeMap::new(); - // for (idx, field) in data_source_plan.schema().fields.iter().enumerate() { - // name_mapping.insert(field.name.clone(), idx); - // } + // Purge files on pipeline finished. + if let Some(stage_context) = &self.stage_context { + let StageContext { + purge, + force: _, + files_to_copy, + duplicated_files_detected, + stage_info, + } = stage_context.as_ref(); + info!( + "set files to be purged, # of copied files: {}, # of duplicated files: {}", + files_to_copy.len(), + duplicated_files_detected.len() + ); - // ( - // CopyIntoTableSource::Stage(Box::new(PhysicalPlan::TableScan(TableScan { - // plan_id: 0, - // name_mapping, - // stat_info: None, - // table_index: None, - // internal_column: None, - // source: Box::new(data_source_plan), - // }))), - // None, - // ) - // }; + let files_to_be_deleted = files_to_copy + .iter() + .map(|v| v.path.clone()) + .chain(duplicated_files_detected.clone()) + .collect::>(); + PipelineBuilder::set_purge_files_on_finished( + self.ctx.clone(), + files_to_be_deleted, + *purge, + stage_info.clone(), + &mut build_res.main_pipeline, + )?; + } - // let mut root = PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { - // plan_id: 0, - // required_values_schema: plan.required_values_schema.clone(), - // values_consts: plan.values_consts.clone(), - // required_source_schema: plan.required_source_schema.clone(), - // stage_table_info: plan.stage_table_info.clone(), - // table_info: to_table.get_table_info().clone(), - // write_mode: plan.write_mode, - // validation_mode: plan.validation_mode.clone(), - // project_columns, - // source, - // is_transform: plan.is_transform, - // })); + // Execute hook. + { + let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; + let hook_operator = HookOperator::create( + self.ctx.clone(), + copy_into_table.catalog_name.to_string(), + copy_into_table.database_name.to_string(), + copy_into_table.table_name.to_string(), + MutationKind::Insert, + LockTableOption::LockNoRetry, + ); + hook_operator.execute(&mut build_res.main_pipeline).await; + } - // if plan.enable_distributed { - // root = PhysicalPlan::Exchange(Exchange { - // plan_id: 0, - // input: Box::new(root), - // kind: FragmentKind::Merge, - // keys: Vec::new(), - // allow_adjust_parallelism: true, - // ignore_exchange: false, - // }); - // } + Ok(build_res) + } - // let mut next_plan_id = 0; - // root.adjust_plan_id(&mut next_plan_id); + fn inject_result(&self) -> Result { + let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; + match ©_into_table.mutation_kind { + MutationKind::CopyInto => { + let stage_context = self.stage_context.as_ref().unwrap(); + let blocks = self.get_copy_into_table_result( + stage_context.stage_info.copy_options.return_failed_only, + )?; + Ok(Box::pin(DataBlockStream::create(None, blocks))) + } + MutationKind::Insert => Ok(Box::pin(DataBlockStream::create(None, vec![]))), + _ => unreachable!(), + } + } +} - // Ok((root, update_stream_meta_reqs)) - todo!() +impl CopyIntoTableInterpreter { + /// Create a CopyInterpreter with context and [`CopyIntoTablePlan`]. + pub fn try_create( + ctx: Arc, + s_expr: SExpr, + metadata: MetadataRef, + stage_context: Option>, + overwrite: bool, + ) -> Result { + Ok(CopyIntoTableInterpreter { + ctx, + s_expr, + metadata, + stage_context, + overwrite, + }) } - fn get_copy_into_table_result( - &self, - stage_table_info: &StageTableInfo, - ) -> Result> { - let return_all = !stage_table_info.stage_info.copy_options.return_failed_only; + fn get_copy_into_table_result(&self, return_failed_only: bool) -> Result> { + let return_all = !return_failed_only; let cs = self.ctx.get_copy_status(); let mut results = cs.files.iter().collect::>(); @@ -219,173 +228,4 @@ impl CopyIntoTableInterpreter { ])]; Ok(blocks) } - - /// Build commit insertion pipeline. - async fn commit_insertion( - &self, - main_pipeline: &mut Pipeline, - plan: &CopyIntoTablePlan, - files_to_copy: Vec, - duplicated_files_detected: Vec, - update_stream_meta: Vec, - deduplicated_label: Option, - ) -> Result<()> { - let ctx = self.ctx.clone(); - let to_table = ctx - .get_table(&plan.catalog_name, &plan.database_name, &plan.table_name) - .await?; - - // Commit. - // { - // let copied_files_meta_req = PipelineBuilder::build_upsert_copied_files_to_meta_req( - // ctx.clone(), - // to_table.as_ref(), - // &plan.stage_table_info.stage_info, - // &files_to_copy, - // plan.force, - // )?; - - // to_table.commit_insertion( - // ctx.clone(), - // main_pipeline, - // copied_files_meta_req, - // update_stream_meta, - // plan.write_mode.is_overwrite(), - // None, - // deduplicated_label, - // )?; - // } - - // Purge files. - // { - // info!( - // "set files to be purged, # of copied files: {}, # of duplicated files: {}", - // files_to_copy.len(), - // duplicated_files_detected.len() - // ); - - // let files_to_be_deleted = files_to_copy - // .into_iter() - // .map(|v| v.path) - // .chain(duplicated_files_detected) - // .collect::>(); - // // set on_finished callback. - // PipelineBuilder::set_purge_files_on_finished( - // ctx.clone(), - // files_to_be_deleted, - // plan.stage_table_info.stage_info.copy_options.purge, - // plan.stage_table_info.stage_info.clone(), - // main_pipeline, - // )?; - // } - Ok(()) - } - - async fn on_no_files_to_copy(&self) -> Result { - // currently, there is only one thing that we care about: - // - // if `purge_duplicated_files_in_copy` and `purge` are all enabled, - // and there are duplicated files detected, we should clean them up immediately. - - // it might be better to reuse the PipelineBuilder::set_purge_files_on_finished, - // unfortunately, hooking the on_finished callback of a "blank" pipeline, - // e.g. `PipelineBuildResult::create` leads to runtime error (during pipeline execution). - - // if self.plan.stage_table_info.stage_info.copy_options.purge - // && !self - // .plan - // .stage_table_info - // .duplicated_files_detected - // .is_empty() - // && self - // .ctx - // .get_settings() - // .get_enable_purge_duplicated_files_in_copy()? - // { - // info!( - // "purge_duplicated_files_in_copy enabled, number of duplicated files: {}", - // self.plan.stage_table_info.duplicated_files_detected.len() - // ); - - // PipelineBuilder::purge_files_immediately( - // self.ctx.clone(), - // self.plan.stage_table_info.duplicated_files_detected.clone(), - // self.plan.stage_table_info.stage_info.clone(), - // ) - // .await?; - // } - Ok(PipelineBuildResult::create()) - } -} - -#[async_trait::async_trait] -impl Interpreter for CopyIntoTableInterpreter { - fn name(&self) -> &str { - "CopyIntoTableInterpreterV2" - } - - fn is_ddl(&self) -> bool { - false - } - - #[fastrace::trace] - #[async_backtrace::framed] - async fn execute2(&self) -> Result { - debug!("ctx.id" = self.ctx.get_id().as_str(); "copy_into_table_interpreter_execute_v2"); - - if check_deduplicate_label(self.ctx.clone()).await? { - return Ok(PipelineBuildResult::create()); - } - - // if self.plan.no_file_to_copy { - // info!("no file to copy"); - // return self.on_no_files_to_copy().await; - // } - - let (physical_plan, update_stream_meta) = self.build_physical_plan(&self.plan).await?; - let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; - - // Build commit insertion pipeline. - // { - // let files_to_copy = self.plan.source.files_to_copy(); - - // let duplicated_files_detected = self.plan.source.duplicated_files_detected(); - - // self.commit_insertion( - // &mut build_res.main_pipeline, - // &self.plan, - // files_to_copy, - // duplicated_files_detected, - // update_stream_meta, - // unsafe { self.ctx.get_settings().get_deduplicate_label()? }, - // ) - // .await?; - // } - - // Execute hook. - { - let hook_operator = HookOperator::create( - self.ctx.clone(), - self.plan.catalog_name.to_string(), - self.plan.database_name.to_string(), - self.plan.table_name.to_string(), - MutationKind::Insert, - LockTableOption::LockNoRetry, - ); - hook_operator.execute(&mut build_res.main_pipeline).await; - } - - Ok(build_res) - } - - fn inject_result(&self) -> Result { - // let blocks = match &self.plan.source { - // AppendSource::Stage(stage) => self.get_copy_into_table_result(stage)?, - // _ => vec![DataBlock::empty_with_schema(self.plan.schema())], - // }; - todo!() - - // Ok(Box::pin(DataBlockStream::create(None, blocks))) - } } diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index 0a90eb381e709..ff660ce5eea96 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -156,11 +156,18 @@ impl InterpreterFactory { *graphical, )?)), - Plan::CopyIntoTable { .. } => todo!(), - // Ok(Arc::new(CopyIntoTableInterpreter::try_create( - // ctx, - // *copy_plan.clone(), - // )?)), + Plan::CopyIntoTable { + s_expr, + metadata, + stage_context, + overwrite, + } => Ok(Arc::new(CopyIntoTableInterpreter::try_create( + ctx, + *s_expr.clone(), + metadata.clone(), + stage_context.clone(), + *overwrite, + )?)), Plan::CopyIntoLocation(copy_plan) => Ok(Arc::new( CopyIntoLocationInterpreter::try_create(ctx, copy_plan.clone())?, )), diff --git a/src/query/service/src/interpreters/interpreter_insert.rs b/src/query/service/src/interpreters/interpreter_insert.rs deleted file mode 100644 index c98267460bab9..0000000000000 --- a/src/query/service/src/interpreters/interpreter_insert.rs +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_catalog::lock::LockTableOption; -use databend_common_catalog::table::TableExt; -use databend_common_exception::ErrorCode; -use databend_common_exception::Result; -use databend_common_expression::DataSchema; -use databend_common_pipeline_sources::AsyncSourcer; -use databend_common_sql::executor::physical_plans::DistributedInsertSelect; -use databend_common_sql::executor::physical_plans::MutationKind; -use databend_common_sql::executor::PhysicalPlan; -use databend_common_sql::executor::PhysicalPlanBuilder; -use databend_common_sql::plans::Insert; -use databend_common_sql::plans::InsertInputSource; -use databend_common_sql::plans::InsertValue; -use databend_common_sql::plans::Plan; -use databend_common_sql::NameResolutionContext; -use log::info; - -use crate::interpreters::common::check_deduplicate_label; -use crate::interpreters::common::dml_build_update_stream_req; -use crate::interpreters::HookOperator; -use crate::interpreters::Interpreter; -use crate::interpreters::InterpreterPtr; -use crate::pipelines::PipelineBuildResult; -use crate::pipelines::PipelineBuilder; -use crate::pipelines::RawValueSource; -use crate::pipelines::ValueSource; -use crate::schedulers::build_query_pipeline_without_render_result_set; -use crate::sessions::QueryContext; -use crate::sessions::TableContext; - -pub struct InsertInterpreter { - ctx: Arc, - plan: Insert, -} - -impl InsertInterpreter { - pub fn try_create(ctx: Arc, plan: Insert) -> Result { - Ok(Arc::new(InsertInterpreter { ctx, plan })) - } - - fn check_schema_cast(&self, plan: &Plan) -> Result { - let output_schema = &self.plan.schema; - let select_schema = plan.schema(); - - // validate schema - if select_schema.fields().len() != output_schema.fields().len() { - return Err(ErrorCode::BadArguments(format!( - "Fields in select statement is not equal with expected, select fields: {}, insert fields: {}", - select_schema.fields().len(), - output_schema.fields().len(), - ))); - } - - // check if cast needed - let cast_needed = select_schema.as_ref() != &DataSchema::from(output_schema.as_ref()); - Ok(cast_needed) - } -} - -#[async_trait::async_trait] -impl Interpreter for InsertInterpreter { - fn name(&self) -> &str { - "InsertIntoInterpreter" - } - - fn is_ddl(&self) -> bool { - false - } - - #[async_backtrace::framed] - async fn execute2(&self) -> Result { - if check_deduplicate_label(self.ctx.clone()).await? { - return Ok(PipelineBuildResult::create()); - } - let table = if let Some(table_info) = &self.plan.table_info { - // if table_info is provided, we should instantiated table with it. - self.ctx - .get_catalog(&self.plan.catalog) - .await? - .get_table_by_info(table_info)? - } else { - self.ctx - .get_table(&self.plan.catalog, &self.plan.database, &self.plan.table) - .await? - }; - - // check mutability - table.check_mutable()?; - - let mut build_res = PipelineBuildResult::create(); - - match &self.plan.source { - InsertInputSource::Stage(_) => { - unreachable!() - } - InsertInputSource::Values(InsertValue::Values { rows }) => { - build_res.main_pipeline.add_source( - |output| { - let inner = ValueSource::new(rows.clone(), self.plan.dest_schema()); - AsyncSourcer::create(self.ctx.clone(), output, inner) - }, - 1, - )?; - } - InsertInputSource::Values(InsertValue::RawValues { data, start }) => { - build_res.main_pipeline.add_source( - |output| { - let name_resolution_ctx = NameResolutionContext { - deny_column_reference: true, - ..Default::default() - }; - let inner = RawValueSource::new( - data.to_string(), - self.ctx.clone(), - name_resolution_ctx, - self.plan.dest_schema(), - *start, - ); - AsyncSourcer::create(self.ctx.clone(), output, inner) - }, - 1, - )?; - } - InsertInputSource::SelectPlan(plan) => { - let table1 = table.clone(); - let (mut select_plan, select_column_bindings, metadata) = match plan.as_ref() { - Plan::Query { - s_expr, - metadata, - bind_context, - .. - } => { - let mut builder1 = - PhysicalPlanBuilder::new(metadata.clone(), self.ctx.clone(), false); - ( - builder1.build(s_expr, bind_context.column_set()).await?, - bind_context.columns.clone(), - metadata, - ) - } - _ => unreachable!(), - }; - - let explain_plan = select_plan - .format(metadata.clone(), Default::default())? - .format_pretty()?; - info!("Insert select plan: \n{}", explain_plan); - - let update_stream_meta = - dml_build_update_stream_req(self.ctx.clone(), metadata).await?; - - // here we remove the last exchange merge plan to trigger distribute insert - let insert_select_plan = match select_plan { - PhysicalPlan::Exchange(ref mut exchange) => { - // insert can be dispatched to different nodes - let input = exchange.input.clone(); - exchange.input = Box::new(PhysicalPlan::DistributedInsertSelect(Box::new( - DistributedInsertSelect { - // TODO(leiysky): we reuse the id of exchange here, - // which is not correct. We should generate a new id for insert. - plan_id: exchange.plan_id, - input, - table_info: table1.get_table_info().clone(), - select_schema: plan.schema(), - select_column_bindings, - insert_schema: self.plan.dest_schema(), - cast_needed: self.check_schema_cast(plan)?, - }, - ))); - select_plan - } - other_plan => { - // insert should wait until all nodes finished - PhysicalPlan::DistributedInsertSelect(Box::new(DistributedInsertSelect { - // TODO: we reuse the id of other plan here, - // which is not correct. We should generate a new id for insert. - plan_id: other_plan.get_id(), - input: Box::new(other_plan), - table_info: table1.get_table_info().clone(), - select_schema: plan.schema(), - select_column_bindings, - insert_schema: self.plan.dest_schema(), - cast_needed: self.check_schema_cast(plan)?, - })) - } - }; - - let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &insert_select_plan) - .await?; - - table.commit_insertion( - self.ctx.clone(), - &mut build_res.main_pipeline, - None, - update_stream_meta, - self.plan.overwrite, - None, - unsafe { self.ctx.get_settings().get_deduplicate_label()? }, - )?; - - // Execute the hook operator. - { - let hook_operator = HookOperator::create( - self.ctx.clone(), - self.plan.catalog.clone(), - self.plan.database.clone(), - self.plan.table.clone(), - MutationKind::Insert, - LockTableOption::LockNoRetry, - ); - hook_operator.execute(&mut build_res.main_pipeline).await; - } - - return Ok(build_res); - } - }; - - PipelineBuilder::build_append2table_with_commit_pipeline( - self.ctx.clone(), - &mut build_res.main_pipeline, - table.clone(), - self.plan.dest_schema(), - None, - vec![], - self.plan.overwrite, - unsafe { self.ctx.get_settings().get_deduplicate_label()? }, - )?; - - // Execute the hook operator. - { - let hook_operator = HookOperator::create( - self.ctx.clone(), - self.plan.catalog.clone(), - self.plan.database.clone(), - self.plan.table.clone(), - MutationKind::Insert, - LockTableOption::LockNoRetry, - ); - hook_operator.execute(&mut build_res.main_pipeline).await; - } - - Ok(build_res) - } -} diff --git a/src/query/service/src/interpreters/interpreter_replace.rs b/src/query/service/src/interpreters/interpreter_replace.rs index a182e3f5b1230..ec3b950c3de34 100644 --- a/src/query/service/src/interpreters/interpreter_replace.rs +++ b/src/query/service/src/interpreters/interpreter_replace.rs @@ -50,7 +50,6 @@ use parking_lot::RwLock; use crate::interpreters::common::check_deduplicate_label; use crate::interpreters::common::dml_build_update_stream_req; -use crate::interpreters::interpreter_copy_into_table::CopyIntoTableInterpreter; use crate::interpreters::HookOperator; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterPtr; @@ -375,7 +374,7 @@ impl ReplaceInterpreter { ctx: Arc, source: &'a InsertInputSource, schema: DataSchemaRef, - purge_info: &mut Option<(Vec, StageInfo)>, + _purge_info: &mut Option<(Vec, StageInfo)>, ) -> Result { match source { InsertInputSource::Values(source) => self @@ -390,27 +389,29 @@ impl ReplaceInterpreter { InsertInputSource::SelectPlan(plan) => { self.connect_query_plan_source(ctx.clone(), plan).await } - InsertInputSource::Stage(plan) => match *plan.clone() { - // Plan::CopyIntoTable(copy_plan) => { - // let interpreter = - // CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; - // let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; - - // // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan - - // *purge_info = Some(( - // copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), - // copy_plan.stage_table_info.stage_info.clone(), - // )); - // Ok(ReplaceSourceCtx { - // root: Box::new(physical_plan), - // select_ctx: None, - // update_stream_meta: vec![], - // bind_context: None, - // }) - // } - _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - }, + _ => todo!(), + // InsertInputSource::Stage(plan) => match *plan.clone() { + // Plan::CopyIntoTable(_copy_plan) => { + // // let interpreter = + // // CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; + // // let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; + + // // // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan + + // // *purge_info = Some(( + // // copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), + // // copy_plan.stage_table_info.stage_info.clone(), + // // )); + // // Ok(ReplaceSourceCtx { + // // root: Box::new(physical_plan), + // // select_ctx: None, + // // update_stream_meta: vec![], + // // bind_context: None, + // // }) + // todo!() + // } + // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), + // }, } } diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs index 60679aa4a7b8b..1fba75308e644 100644 --- a/src/query/service/src/interpreters/interpreter_table_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_create.rs @@ -38,7 +38,10 @@ use databend_common_meta_app::schema::TableNameIdent; use databend_common_meta_app::schema::TableStatistics; use databend_common_meta_types::MatchSeq; use databend_common_pipeline_core::ExecutionInfo; +use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::field_default_value; +use databend_common_sql::optimizer::SExpr; +use databend_common_sql::plans::CopyIntoTablePlan; use databend_common_sql::plans::CreateTablePlan; use databend_common_storages_fuse::io::MetaReaders; use databend_common_storages_fuse::FuseStorageFormat; @@ -63,13 +66,11 @@ use crate::interpreters::common::table_option_validation::is_valid_create_opt; use crate::interpreters::common::table_option_validation::is_valid_data_retention_period; use crate::interpreters::common::table_option_validation::is_valid_random_seed; use crate::interpreters::common::table_option_validation::is_valid_row_per_block; -use crate::interpreters::InsertInterpreter; +use crate::interpreters::interpreter_copy_into_table::CopyIntoTableInterpreter; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::plans::Insert; -use crate::sql::plans::InsertInputSource; use crate::sql::plans::Plan; use crate::storages::StorageDescription; @@ -218,26 +219,44 @@ impl CreateTableInterpreter { // For the situation above, we implicitly cast the data type when inserting data. // The casting and schema checking is in interpreter_insert.rs, function check_schema_cast. - let table_info = TableInfo::new( + let _table_info = TableInfo::new( &self.plan.database, &self.plan.table, TableIdent::new(table_id, table_id_seq), table_meta, ); - let insert_plan = Insert { - catalog: self.plan.catalog.clone(), - database: self.plan.database.clone(), - table: self.plan.table.clone(), - schema: self.plan.schema.clone(), - overwrite: false, - source: InsertInputSource::SelectPlan(select_plan), - table_info: Some(table_info), + let (project_columns, source, metadata) = match select_plan.as_ref() { + Plan::Query { + bind_context, + s_expr, + metadata, + .. + } => ( + Some(bind_context.columns.clone()), + *s_expr.clone(), + metadata.clone(), + ), + _ => unreachable!(), }; - let mut pipeline = InsertInterpreter::try_create(self.ctx.clone(), insert_plan)? - .execute2() - .await?; + let insert_plan = CopyIntoTablePlan { + catalog_name: self.plan.catalog.clone(), + database_name: self.plan.database.clone(), + table_name: self.plan.table.clone(), + required_values_schema: Arc::new(self.plan.schema.clone().into()), + values_consts: vec![], + required_source_schema: Arc::new(self.plan.schema.clone().into()), + project_columns, + mutation_kind: MutationKind::Insert, + }; + + let s_expr = SExpr::create_unary(Arc::new(insert_plan.into()), Arc::new(source)); + + let mut pipeline = + CopyIntoTableInterpreter::try_create(self.ctx.clone(), s_expr, metadata, None, false)? + .execute2() + .await?; let db_name = self.plan.database.clone(); let table_name = self.plan.table.clone(); diff --git a/src/query/service/src/interpreters/mod.rs b/src/query/service/src/interpreters/mod.rs index 2142577b4a9ed..9db1a05e1487d 100644 --- a/src/query/service/src/interpreters/mod.rs +++ b/src/query/service/src/interpreters/mod.rs @@ -48,7 +48,6 @@ mod interpreter_file_format_show; mod interpreter_index_create; mod interpreter_index_drop; mod interpreter_index_refresh; -mod interpreter_insert; mod interpreter_insert_multi_table; mod interpreter_kill; mod interpreter_metrics; @@ -167,7 +166,6 @@ pub use interpreter_execute_immediate::ExecuteImmediateInterpreter; pub use interpreter_explain::ExplainInterpreter; pub use interpreter_factory::InterpreterFactory; pub use interpreter_index_refresh::RefreshIndexInterpreter; -pub use interpreter_insert::InsertInterpreter; pub use interpreter_insert_multi_table::InsertMultiTableInterpreter; pub use interpreter_kill::KillInterpreter; pub use interpreter_metrics::InterpreterMetrics; diff --git a/src/query/service/src/pipelines/builders/builder_copy_into_table.rs b/src/query/service/src/pipelines/builders/builder_copy_into_table.rs index 361cb573b406d..86ce7e4f2e8d9 100644 --- a/src/query/service/src/pipelines/builders/builder_copy_into_table.rs +++ b/src/query/service/src/pipelines/builders/builder_copy_into_table.rs @@ -24,45 +24,32 @@ use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::DataSchemaRefExt; use databend_common_expression::Scalar; -use databend_common_meta_app::principal::FileFormatParams; -use databend_common_meta_app::principal::ParquetFileFormatParams; -use databend_common_meta_app::principal::StageInfo; +// use databend_common_meta_app::principal::FileFormatParams; +// use databend_common_meta_app::principal::ParquetFileFormatParams; +// use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableCopiedFileInfo; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; use databend_common_sql::executor::physical_plans::CopyIntoTable; -use databend_common_sql::executor::physical_plans::CopyIntoTableSource; -use databend_common_sql::plans::CopyIntoTableMode; use databend_common_storage::StageFileInfo; use log::debug; use log::info; use crate::pipelines::processors::transforms::TransformAddConstColumns; use crate::pipelines::processors::TransformCastSchema; -use crate::pipelines::processors::TransformNullIf; +// use crate::pipelines::processors::TransformNullIf; use crate::pipelines::PipelineBuilder; use crate::sessions::QueryContext; -/// This file implements copy into table pipeline builder. impl PipelineBuilder { pub(crate) fn build_copy_into_table(&mut self, copy: &CopyIntoTable) -> Result<()> { let to_table = self.ctx.build_table_by_table_info(©.table_info, None)?; - let source_schema = match ©.source { - CopyIntoTableSource::Query(input) => { - self.build_pipeline(input)?; - // Reorder the result for select clause - PipelineBuilder::build_result_projection( - &self.func_ctx, - input.output_schema()?, - copy.project_columns.as_ref().unwrap(), - &mut self.main_pipeline, - false, - )?; - let fields = copy - .project_columns - .as_ref() - .unwrap() + self.ctx + .set_read_block_thresholds(to_table.get_block_thresholds()); + let source_schema = match ©.project_columns { + Some(project_columns) => { + let fields = project_columns .iter() .map(|column_binding| { DataField::new( @@ -73,14 +60,19 @@ impl PipelineBuilder { .collect(); DataSchemaRefExt::create(fields) } - CopyIntoTableSource::Stage(input) => { - self.ctx - .set_read_block_thresholds(to_table.get_block_thresholds()); - - self.build_pipeline(input)?; - copy.required_source_schema.clone() - } + None => copy.input.output_schema()?, }; + + self.build_pipeline(©.input)?; + if let Some(project_columns) = ©.project_columns { + PipelineBuilder::build_result_projection( + &self.func_ctx, + copy.input.output_schema()?, + project_columns, + &mut self.main_pipeline, + false, + )?; + } Self::build_append_data_pipeline( self.ctx.clone(), &mut self.main_pipeline, @@ -91,34 +83,32 @@ impl PipelineBuilder { Ok(()) } - fn need_null_if_processor<'a>( - plan: &'a CopyIntoTable, - source_schema: &Arc, - dest_schema: &Arc, - ) -> Option<&'a [String]> { - if plan.is_transform { - return None; - } - if let FileFormatParams::Parquet(ParquetFileFormatParams { null_if, .. }) = - &plan.stage_table_info.stage_info.file_format_params - { - if !null_if.is_empty() - && source_schema - .fields - .iter() - .zip(dest_schema.fields.iter()) - .any(|(src_field, dest_field)| { - TransformNullIf::column_need_transform( - src_field.data_type(), - dest_field.data_type(), - ) - }) - { - return Some(null_if); - } - } - None - } + // fn need_null_if_processor<'a>( + // plan: &'a CopyIntoTable, + // _source_schema: &Arc, + // _dest_schema: &Arc, + // ) -> Option<&'a [String]> { + // // if let FileFormatParams::Parquet(ParquetFileFormatParams { null_if, .. }) = + // // &plan.stage_table_info.stage_info.file_format_params + // // { + // // if !null_if.is_empty() + // // && source_schema + // // .fields + // // .iter() + // // .zip(dest_schema.fields.iter()) + // // .any(|(src_field, dest_field)| { + // // TransformNullIf::column_need_transform( + // // src_field.data_type(), + // // dest_field.data_type(), + // // ) + // // }) + // // { + // // return Some(null_if); + // // } + // // } + // // None + // todo!() + // } fn build_append_data_pipeline( ctx: Arc, @@ -130,24 +120,23 @@ impl PipelineBuilder { let plan_required_source_schema = &plan.required_source_schema; let plan_values_consts = &plan.values_consts; let plan_required_values_schema = &plan.required_values_schema; - let plan_write_mode = &plan.write_mode; - let source_schema = if let Some(null_if) = - Self::need_null_if_processor(plan, &source_schema, plan_required_source_schema) - { - let func_ctx = ctx.get_function_context()?; - main_pipeline.try_add_transformer(|| { - TransformNullIf::try_new( - source_schema.clone(), - plan_required_source_schema.clone(), - func_ctx.clone(), - null_if, - ) - })?; - TransformNullIf::new_schema(&source_schema) - } else { - source_schema - }; + // let source_schema = if let Some(null_if) = + // Self::need_null_if_processor(plan, &source_schema, plan_required_source_schema) + // { + // let func_ctx = ctx.get_function_context()?; + // main_pipeline.try_add_transformer(|| { + // TransformNullIf::try_new( + // source_schema.clone(), + // plan_required_source_schema.clone(), + // func_ctx.clone(), + // null_if, + // ) + // })?; + // TransformNullIf::new_schema(&source_schema) + // } else { + // source_schema + // }; if &source_schema != plan_required_source_schema { // only parquet need cast @@ -171,31 +160,18 @@ impl PipelineBuilder { )?; } - // append data without commit. - match plan_write_mode { - CopyIntoTableMode::Insert { overwrite: _ } => { - Self::build_append2table_without_commit_pipeline( - ctx, - main_pipeline, - to_table.clone(), - plan_required_values_schema.clone(), - )? - } - CopyIntoTableMode::Replace => {} - CopyIntoTableMode::Copy => Self::build_append2table_without_commit_pipeline( - ctx, - main_pipeline, - to_table.clone(), - plan_required_values_schema.clone(), - )?, - } - Ok(()) + Self::build_append2table_without_commit_pipeline( + ctx, + main_pipeline, + to_table.clone(), + plan_required_values_schema.clone(), + ) } pub(crate) fn build_upsert_copied_files_to_meta_req( ctx: Arc, to_table: &dyn Table, - stage_info: &StageInfo, + purge: bool, copied_files: &[StageFileInfo], force: bool, ) -> Result> { @@ -216,7 +192,7 @@ impl PipelineBuilder { let expire_hours = ctx.get_settings().get_load_file_metadata_expire_hours()?; let upsert_copied_files_request = { - if stage_info.copy_options.purge && force { + if purge && force { // if `purge-after-copy` is enabled, and in `force` copy mode, // we do not need to upsert copied files into meta server info!( diff --git a/src/query/service/src/pipelines/builders/builder_replace_into.rs b/src/query/service/src/pipelines/builders/builder_replace_into.rs index ad3fd8854904c..9564103a21d0b 100644 --- a/src/query/service/src/pipelines/builders/builder_replace_into.rs +++ b/src/query/service/src/pipelines/builders/builder_replace_into.rs @@ -34,7 +34,7 @@ use databend_common_pipeline_core::processors::InputPort; use databend_common_pipeline_core::processors::OutputPort; use databend_common_pipeline_core::Pipe; use databend_common_pipeline_sources::AsyncSource; -use databend_common_pipeline_sources::AsyncSourcer; +// use databend_common_pipeline_sources::AsyncSourcer; use databend_common_pipeline_transforms::processors::build_compact_block_pipeline; use databend_common_pipeline_transforms::processors::create_dummy_item; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; @@ -43,7 +43,7 @@ use databend_common_sql::executor::physical_plans::ReplaceAsyncSourcer; use databend_common_sql::executor::physical_plans::ReplaceDeduplicate; use databend_common_sql::executor::physical_plans::ReplaceInto; use databend_common_sql::executor::physical_plans::ReplaceSelectCtx; -use databend_common_sql::plans::InsertValue; +// use databend_common_sql::plans::InsertValue; use databend_common_sql::BindContext; use databend_common_sql::Metadata; use databend_common_sql::MetadataRef; @@ -72,30 +72,30 @@ impl PipelineBuilder { // build async sourcer pipeline. pub(crate) fn build_async_sourcer( &mut self, - async_sourcer: &ReplaceAsyncSourcer, + _async_sourcer: &ReplaceAsyncSourcer, ) -> Result<()> { - self.main_pipeline.add_source( - |output| { - let name_resolution_ctx = NameResolutionContext::try_from(self.settings.as_ref())?; - match &async_sourcer.source { - InsertValue::Values { rows } => { - let inner = ValueSource::new(rows.clone(), async_sourcer.schema.clone()); - AsyncSourcer::create(self.ctx.clone(), output, inner) - } - InsertValue::RawValues { data, start } => { - let inner = RawValueSource::new( - data.clone(), - self.ctx.clone(), - name_resolution_ctx, - async_sourcer.schema.clone(), - *start, - ); - AsyncSourcer::create(self.ctx.clone(), output, inner) - } - } - }, - 1, - )?; + // self.main_pipeline.add_source( + // |output| { + // let name_resolution_ctx = NameResolutionContext::try_from(self.settings.as_ref())?; + // match &async_sourcer.source { + // InsertValue::Values { rows } => { + // let inner = ValueSource::new(rows.clone(), async_sourcer.schema.clone()); + // AsyncSourcer::create(self.ctx.clone(), output, inner) + // } + // InsertValue::RawValues { data, start } => { + // let inner = RawValueSource::new( + // data.clone(), + // self.ctx.clone(), + // name_resolution_ctx, + // async_sourcer.schema.clone(), + // *start, + // ); + // AsyncSourcer::create(self.ctx.clone(), output, inner) + // } + // } + // }, + // 1, + // )?; Ok(()) } @@ -398,9 +398,9 @@ pub struct ValueSource { } impl ValueSource { - pub fn new(rows: Vec>, schema: DataSchemaRef) -> Self { + pub fn new(rows: Arc>>, schema: DataSchemaRef) -> Self { Self { - rows: Arc::new(rows), + rows, schema, is_finished: false, } @@ -442,7 +442,7 @@ impl AsyncSource for ValueSource { } pub struct RawValueSource { - data: String, + data: Arc, ctx: Arc, name_resolution_ctx: NameResolutionContext, bind_context: BindContext, @@ -454,12 +454,13 @@ pub struct RawValueSource { impl RawValueSource { pub fn new( - data: String, + data: Arc, ctx: Arc, name_resolution_ctx: NameResolutionContext, schema: DataSchemaRef, start: usize, ) -> Self { + println!("schema: {:?}", schema); let bind_context = BindContext::new(); let metadata = Arc::new(RwLock::new(Metadata::default())); diff --git a/src/query/service/src/pipelines/builders/builder_scan.rs b/src/query/service/src/pipelines/builders/builder_scan.rs index 2ba6f9ce135ef..3d7f6d21f4d8d 100644 --- a/src/query/service/src/pipelines/builders/builder_scan.rs +++ b/src/query/service/src/pipelines/builders/builder_scan.rs @@ -18,6 +18,7 @@ use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_pipeline_sources::AsyncSourcer; use databend_common_pipeline_sources::OneBlockSource; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; use databend_common_sql::evaluator::BlockOperator; @@ -26,9 +27,14 @@ use databend_common_sql::executor::physical_plans::CacheScan; use databend_common_sql::executor::physical_plans::ConstantTableScan; use databend_common_sql::executor::physical_plans::CteScan; use databend_common_sql::executor::physical_plans::ExpressionScan; +use databend_common_sql::executor::physical_plans::PhysicalValueScan; use databend_common_sql::executor::physical_plans::TableScan; +use databend_common_sql::executor::physical_plans::Values; use databend_common_sql::plans::CacheSource; +use databend_common_sql::NameResolutionContext; +use super::RawValueSource; +use super::ValueSource; use crate::pipelines::processors::transforms::CacheSourceState; use crate::pipelines::processors::transforms::HashJoinCacheState; use crate::pipelines::processors::transforms::MaterializedCteSource; @@ -163,4 +169,33 @@ impl PipelineBuilder { Ok(()) } + + pub(crate) fn build_value_scan(&mut self, scan: &PhysicalValueScan) -> Result<()> { + match &scan.values { + Values::Values(rows) => self.main_pipeline.add_source( + |output| { + let inner = ValueSource::new(rows.clone(), scan.output_schema.clone()); + AsyncSourcer::create(self.ctx.clone(), output, inner) + }, + 1, + ), + Values::RawValues { rest_str, start } => self.main_pipeline.add_source( + |output| { + let name_resolution_ctx = NameResolutionContext { + deny_column_reference: true, + ..Default::default() + }; + let inner = RawValueSource::new( + rest_str.clone(), + self.ctx.clone(), + name_resolution_ctx, + scan.output_schema.clone(), + *start, + ); + AsyncSourcer::create(self.ctx.clone(), output, inner) + }, + 1, + ), + } + } } diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 653324ffbdab3..4b4ba961a9b9e 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -256,6 +256,7 @@ impl PipelineBuilder { PhysicalPlan::ColumnMutation(column_mutation) => { self.build_column_mutation(column_mutation) } + PhysicalPlan::ValueScan(value_scan) => self.build_value_scan(value_scan), }?; self.is_exchange_neighbor = is_exchange_neighbor; diff --git a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs index a0e9c43d3b29a..902c8eb42ce74 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs @@ -349,6 +349,7 @@ async fn create_memory_table_for_cte_scan( | PhysicalPlan::ChunkFillAndReorder(_) | PhysicalPlan::ChunkAppendData(_) | PhysicalPlan::ChunkMerge(_) + | PhysicalPlan::ValueScan(_) | PhysicalPlan::ChunkCommitInsert(_) => {} } Ok(()) diff --git a/src/query/service/src/schedulers/fragments/fragmenter.rs b/src/query/service/src/schedulers/fragments/fragmenter.rs index c00a11e7403d2..f3ad2db19de48 100644 --- a/src/query/service/src/schedulers/fragments/fragmenter.rs +++ b/src/query/service/src/schedulers/fragments/fragmenter.rs @@ -19,7 +19,6 @@ use databend_common_exception::Result; use databend_common_sql::executor::physical_plans::CompactSource; use databend_common_sql::executor::physical_plans::ConstantTableScan; use databend_common_sql::executor::physical_plans::CopyIntoTable; -use databend_common_sql::executor::physical_plans::CopyIntoTableSource; use databend_common_sql::executor::physical_plans::Exchange; use databend_common_sql::executor::physical_plans::ExchangeSink; use databend_common_sql::executor::physical_plans::ExchangeSource; @@ -179,21 +178,13 @@ impl PhysicalPlanReplacer for Fragmenter { }))) } - // TODO(Sky): remove redundant code fn replace_copy_into_table(&mut self, plan: &CopyIntoTable) -> Result { - match &plan.source { - CopyIntoTableSource::Stage(_) => { - self.state = State::SelectLeaf; - Ok(PhysicalPlan::CopyIntoTable(Box::new(plan.clone()))) - } - CopyIntoTableSource::Query(query_physical_plan) => { - let input = self.replace(query_physical_plan)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { - source: CopyIntoTableSource::Query(Box::new(input)), - ..plan.clone() - }))) - } - } + let input = self.replace(&plan.input)?; + Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + plan_id: plan.plan_id, + input: Box::new(input), + ..plan.clone() + }))) } fn replace_recluster(&mut self, plan: &Recluster) -> Result { diff --git a/src/query/service/src/schedulers/fragments/plan_fragment.rs b/src/query/service/src/schedulers/fragments/plan_fragment.rs index db415a5bc29c9..2884dc4dea27a 100644 --- a/src/query/service/src/schedulers/fragments/plan_fragment.rs +++ b/src/query/service/src/schedulers/fragments/plan_fragment.rs @@ -28,7 +28,6 @@ use databend_common_settings::ReplaceIntoShuffleStrategy; use databend_common_sql::executor::physical_plans::CompactSource; use databend_common_sql::executor::physical_plans::ConstantTableScan; use databend_common_sql::executor::physical_plans::CopyIntoTable; -use databend_common_sql::executor::physical_plans::CopyIntoTableSource; use databend_common_sql::executor::physical_plans::MutationSource; use databend_common_sql::executor::physical_plans::Recluster; use databend_common_sql::executor::physical_plans::ReplaceDeduplicate; @@ -536,22 +535,12 @@ impl PhysicalPlanReplacer for ReplaceReadSource { } fn replace_copy_into_table(&mut self, plan: &CopyIntoTable) -> Result { - match &plan.source { - CopyIntoTableSource::Query(query_physical_plan) => { - let input = self.replace(query_physical_plan)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { - source: CopyIntoTableSource::Query(Box::new(input)), - ..plan.clone() - }))) - } - CopyIntoTableSource::Stage(v) => { - let input = self.replace(v)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { - source: CopyIntoTableSource::Stage(Box::new(input)), - ..plan.clone() - }))) - } - } + let input = self.replace(&plan.input)?; + Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + plan_id: plan.plan_id, + input: Box::new(input), + ..plan.clone() + }))) } } diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 3ece5cf2e064b..b1568fb2b2554 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -485,6 +485,9 @@ fn to_format_tree( )) } PhysicalPlan::AsyncFunction(plan) => async_function_to_format_tree(plan, metadata, profs), + PhysicalPlan::ValueScan(plan) => { + Ok(FormatTreeNode::new(format!("ValueScan: {}", plan.plan_id))) + } } } diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 4bbe18f59e0b7..1d97fd0d38d31 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -27,6 +27,7 @@ use super::physical_plans::MutationManipulate; use super::physical_plans::MutationOrganize; use super::physical_plans::MutationSource; use super::physical_plans::MutationSplit; +use super::physical_plans::PhysicalValueScan; use crate::executor::physical_plans::AggregateExpand; use crate::executor::physical_plans::AggregateFinal; use crate::executor::physical_plans::AggregatePartial; @@ -45,7 +46,6 @@ use crate::executor::physical_plans::CompactSource; use crate::executor::physical_plans::ConstantTableScan; use crate::executor::physical_plans::CopyIntoLocation; use crate::executor::physical_plans::CopyIntoTable; -use crate::executor::physical_plans::CopyIntoTableSource; use crate::executor::physical_plans::CteScan; use crate::executor::physical_plans::DistributedInsertSelect; use crate::executor::physical_plans::Duplicate; @@ -111,6 +111,7 @@ pub enum PhysicalPlan { /// Copy into table CopyIntoTable(Box), + ValueScan(Box), CopyIntoLocation(Box), /// Replace @@ -289,10 +290,11 @@ impl PhysicalPlan { PhysicalPlan::CopyIntoTable(plan) => { plan.plan_id = *next_id; *next_id += 1; - match &mut plan.source { - CopyIntoTableSource::Query(input) => input.adjust_plan_id(next_id), - CopyIntoTableSource::Stage(input) => input.adjust_plan_id(next_id), - }; + plan.input.adjust_plan_id(next_id); + } + PhysicalPlan::ValueScan(plan) => { + plan.plan_id = *next_id; + *next_id += 1; } PhysicalPlan::CopyIntoLocation(plan) => { plan.plan_id = *next_id; @@ -462,6 +464,7 @@ impl PhysicalPlan { PhysicalPlan::ChunkMerge(v) => v.plan_id, PhysicalPlan::ChunkCommitInsert(v) => v.plan_id, PhysicalPlan::RecursiveCteScan(v) => v.plan_id, + PhysicalPlan::ValueScan(v) => v.plan_id, } } @@ -518,6 +521,7 @@ impl PhysicalPlan { PhysicalPlan::ChunkAppendData(_) => todo!(), PhysicalPlan::ChunkMerge(_) => todo!(), PhysicalPlan::ChunkCommitInsert(_) => todo!(), + PhysicalPlan::ValueScan(plan) => plan.output_schema(), } } @@ -580,6 +584,7 @@ impl PhysicalPlan { PhysicalPlan::ChunkAppendData(_) => "WriteData".to_string(), PhysicalPlan::ChunkMerge(_) => "ChunkMerge".to_string(), PhysicalPlan::ChunkCommitInsert(_) => "Commit".to_string(), + PhysicalPlan::ValueScan(_) => "ValueScan".to_string(), } } @@ -593,6 +598,7 @@ impl PhysicalPlan { | PhysicalPlan::CompactSource(_) | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::Recluster(_) + | PhysicalPlan::ValueScan(_) | PhysicalPlan::RecursiveCteScan(_) => Box::new(std::iter::empty()), PhysicalPlan::Filter(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::EvalScalar(plan) => Box::new(std::iter::once(plan.input.as_ref())), @@ -651,10 +657,7 @@ impl PhysicalPlan { PhysicalPlan::ChunkAppendData(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::ChunkMerge(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::ChunkCommitInsert(plan) => Box::new(std::iter::once(plan.input.as_ref())), - PhysicalPlan::CopyIntoTable(v) => match &v.source { - CopyIntoTableSource::Query(v) => Box::new(std::iter::once(v.as_ref())), - CopyIntoTableSource::Stage(v) => Box::new(std::iter::once(v.as_ref())), - }, + PhysicalPlan::CopyIntoTable(v) => Box::new(std::iter::once(v.input.as_ref())), } } @@ -711,6 +714,7 @@ impl PhysicalPlan { | PhysicalPlan::ChunkFillAndReorder(_) | PhysicalPlan::ChunkAppendData(_) | PhysicalPlan::ChunkMerge(_) + | PhysicalPlan::ValueScan(_) | PhysicalPlan::ChunkCommitInsert(_) => None, } } diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index e386d1d4b5d3d..1e519da935538 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -142,6 +142,10 @@ impl PhysicalPlanBuilder { } RelOperator::Recluster(recluster) => self.build_recluster(recluster).await, RelOperator::CompactBlock(compact) => self.build_compact_block(compact).await, + RelOperator::CopyIntoTable(copy_into_table) => { + self.build_copy_into_table(s_expr, copy_into_table).await + } + RelOperator::ValueScan(value_scan) => self.build_value_scan(value_scan).await, } } diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs index 6dcab582c7f34..5c6b91a4bce06 100644 --- a/src/query/sql/src/executor/physical_plan_visitor.rs +++ b/src/query/sql/src/executor/physical_plan_visitor.rs @@ -20,6 +20,7 @@ use super::physical_plans::ExpressionScan; use super::physical_plans::MutationManipulate; use super::physical_plans::MutationOrganize; use super::physical_plans::MutationSplit; +use super::physical_plans::PhysicalValueScan; use super::physical_plans::RecursiveCteScan; use crate::executor::physical_plan::PhysicalPlan; use crate::executor::physical_plans::AggregateExpand; @@ -39,7 +40,6 @@ use crate::executor::physical_plans::CompactSource; use crate::executor::physical_plans::ConstantTableScan; use crate::executor::physical_plans::CopyIntoLocation; use crate::executor::physical_plans::CopyIntoTable; -use crate::executor::physical_plans::CopyIntoTableSource; use crate::executor::physical_plans::CteScan; use crate::executor::physical_plans::DistributedInsertSelect; use crate::executor::physical_plans::Duplicate; @@ -122,9 +122,14 @@ pub trait PhysicalPlanReplacer { PhysicalPlan::ChunkAppendData(plan) => self.replace_chunk_append_data(plan), PhysicalPlan::ChunkMerge(plan) => self.replace_chunk_merge(plan), PhysicalPlan::ChunkCommitInsert(plan) => self.replace_chunk_commit_insert(plan), + PhysicalPlan::ValueScan(plan) => self.replace_value_scan(plan), } } + fn replace_value_scan(&mut self, plan: &PhysicalValueScan) -> Result { + Ok(PhysicalPlan::ValueScan(Box::new(plan.clone()))) + } + fn replace_recluster(&mut self, plan: &Recluster) -> Result { Ok(PhysicalPlan::Recluster(Box::new(plan.clone()))) } @@ -399,18 +404,13 @@ pub trait PhysicalPlanReplacer { } fn replace_copy_into_table(&mut self, plan: &CopyIntoTable) -> Result { - match &plan.source { - CopyIntoTableSource::Stage(_) => { - Ok(PhysicalPlan::CopyIntoTable(Box::new(plan.clone()))) - } - CopyIntoTableSource::Query(query_physical_plan) => { - let input = self.replace(query_physical_plan)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { - source: CopyIntoTableSource::Query(Box::new(input)), - ..plan.clone() - }))) - } - } + let input = self.replace(&plan.input)?; + + Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + plan_id: plan.plan_id, + input: Box::new(input), + ..plan.clone() + }))) } fn replace_copy_into_location(&mut self, plan: &CopyIntoLocation) -> Result { @@ -663,6 +663,7 @@ impl PhysicalPlan { | PhysicalPlan::Recluster(_) | PhysicalPlan::ExchangeSource(_) | PhysicalPlan::CompactSource(_) + | PhysicalPlan::ValueScan(_) | PhysicalPlan::MutationSource(_) => {} PhysicalPlan::Filter(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit); @@ -714,14 +715,9 @@ impl PhysicalPlan { PhysicalPlan::ProjectSet(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit) } - PhysicalPlan::CopyIntoTable(plan) => match &plan.source { - CopyIntoTableSource::Query(input) => { - Self::traverse(input, pre_visit, visit, post_visit); - } - CopyIntoTableSource::Stage(input) => { - Self::traverse(input, pre_visit, visit, post_visit); - } - }, + PhysicalPlan::CopyIntoTable(plan) => { + Self::traverse(&plan.input, pre_visit, visit, post_visit) + } PhysicalPlan::CopyIntoLocation(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit) } diff --git a/src/query/sql/src/executor/physical_plans/common.rs b/src/query/sql/src/executor/physical_plans/common.rs index 2670ca5e93eae..8ea71625bd1e6 100644 --- a/src/query/sql/src/executor/physical_plans/common.rs +++ b/src/query/sql/src/executor/physical_plans/common.rs @@ -71,13 +71,14 @@ pub enum FragmentKind { Merge, } -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Copy)] +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Copy, PartialEq, Eq)] pub enum MutationKind { Delete, Update, Replace, Recluster, Insert, + CopyInto, Compact, MergeInto, } @@ -92,6 +93,7 @@ impl Display for MutationKind { MutationKind::Replace => write!(f, "Replace"), MutationKind::Compact => write!(f, "Compact"), MutationKind::MergeInto => write!(f, "MergeInto"), + MutationKind::CopyInto => write!(f, "CopyInto"), } } } diff --git a/src/query/sql/src/executor/physical_plans/mod.rs b/src/query/sql/src/executor/physical_plans/mod.rs index f8b1c86702d72..db149b7fd768d 100644 --- a/src/query/sql/src/executor/physical_plans/mod.rs +++ b/src/query/sql/src/executor/physical_plans/mod.rs @@ -56,6 +56,7 @@ mod physical_sort; mod physical_table_scan; mod physical_udf; mod physical_union_all; +mod physical_value_scan; mod physical_window; pub use common::*; @@ -106,5 +107,6 @@ pub use physical_table_scan::TableScan; pub use physical_udf::Udf; pub use physical_udf::UdfFunctionDesc; pub use physical_union_all::UnionAll; +pub use physical_value_scan::*; pub use physical_window::*; pub use physical_window_partition::WindowPartition; diff --git a/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs b/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs index 1fd92f5c3ce57..0e10c7fb01459 100644 --- a/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs +++ b/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs @@ -12,40 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_catalog::plan::StageTableInfo; use databend_common_exception::Result; use databend_common_expression::DataSchemaRef; use databend_common_expression::DataSchemaRefExt; use databend_common_expression::Scalar; use databend_common_meta_app::schema::TableInfo; -use enum_as_inner::EnumAsInner; use crate::executor::physical_plan::PhysicalPlan; -use crate::plans::CopyIntoTableMode; -use crate::plans::ValidationMode; use crate::ColumnBinding; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct CopyIntoTable { pub plan_id: u32, + pub input: Box, pub required_values_schema: DataSchemaRef, pub values_consts: Vec, pub required_source_schema: DataSchemaRef, - pub write_mode: CopyIntoTableMode, - pub validation_mode: ValidationMode, - pub stage_table_info: StageTableInfo, pub table_info: TableInfo, - pub project_columns: Option>, - pub source: CopyIntoTableSource, - pub is_transform: bool, -} - -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, EnumAsInner)] -pub enum CopyIntoTableSource { - Query(Box), - Stage(Box), } impl CopyIntoTable { diff --git a/src/query/sql/src/executor/physical_plans/physical_value_scan.rs b/src/query/sql/src/executor/physical_plans/physical_value_scan.rs new file mode 100644 index 0000000000000..1eec0606a4bae --- /dev/null +++ b/src/query/sql/src/executor/physical_plans/physical_value_scan.rs @@ -0,0 +1,38 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_exception::Result; +use databend_common_expression::DataSchemaRef; +use databend_common_expression::Scalar; + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq, Hash)] +pub enum Values { + Values(Arc>>), + RawValues { rest_str: Arc, start: usize }, +} + +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub struct PhysicalValueScan { + pub plan_id: u32, + pub values: Values, + pub output_schema: DataSchemaRef, +} + +impl PhysicalValueScan { + pub fn output_schema(&self) -> Result { + Ok(self.output_schema.clone()) + } +} diff --git a/src/query/sql/src/planner/binder/binder.rs b/src/query/sql/src/planner/binder/binder.rs index f893894cd03f5..0646164cbf2fd 100644 --- a/src/query/sql/src/planner/binder/binder.rs +++ b/src/query/sql/src/planner/binder/binder.rs @@ -401,8 +401,7 @@ impl<'a> Binder { warn!("In INSERT resolve optimize hints {:?} failed, err: {:?}", hints, e); } } - // self.bind_insert(bind_context, stmt).await? - todo!() + self.bind_insert(bind_context, stmt).await? } Statement::InsertMultiTable(stmt) => { self.bind_insert_multi_table(bind_context, stmt).await? diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index f66a171c24d45..50e39b617abb0 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -67,11 +67,11 @@ use parking_lot::RwLock; use crate::binder::bind_query::MaxColumnPosition; use crate::binder::location::parse_uri_location; use crate::binder::Binder; +use crate::executor::physical_plans::MutationKind; use crate::optimizer::SExpr; -use crate::plans::AppendSource; -use crate::plans::CopyIntoTableMode; use crate::plans::CopyIntoTablePlan; use crate::plans::Plan; +use crate::plans::StageContext; use crate::BindContext; use crate::Metadata; use crate::NameResolutionContext; @@ -84,35 +84,49 @@ impl<'a> Binder { bind_context: &mut BindContext, stmt: &CopyIntoTableStmt, ) -> Result { - // match &stmt.src { - // CopyIntoTableSource::Location(location) => { - // let mut plan = self - // .bind_copy_into_table_common(bind_context, stmt, location) - // .await?; - - // // for copy from location, collect files explicitly - // plan.collect_files(self.ctx.as_ref()).await?; - // self.bind_copy_into_table_from_location(bind_context, plan) - // .await - // } - // CopyIntoTableSource::Query(query) => { - // self.init_cte(bind_context, &stmt.with)?; - - // let mut max_column_position = MaxColumnPosition::new(); - // query.drive(&mut max_column_position); - // self.metadata - // .write() - // .set_max_column_position(max_column_position.max_pos); - // let (select_list, location, alias) = check_transform_query(query)?; - // let plan = self - // .bind_copy_into_table_common(bind_context, stmt, location) - // .await?; - - // self.bind_copy_from_query_into_table(bind_context, plan, select_list, alias) - // .await - // } - // } - todo!() + match &stmt.src { + CopyIntoTableSource::Location(location) => { + let (copy_into_table_plan, mut stage_table_info) = self + .bind_copy_into_table_common(bind_context, stmt, location) + .await?; + copy_into_table_plan + .collect_files(self.ctx.as_ref(), &mut stage_table_info, stmt.force) + .await?; + self.bind_copy_into_table_from_location( + bind_context, + copy_into_table_plan, + stage_table_info, + stmt.force, + ) + .await + } + CopyIntoTableSource::Query(query) => { + self.init_cte(bind_context, &stmt.with)?; + + let mut max_column_position = MaxColumnPosition::new(); + query.drive(&mut max_column_position); + self.metadata + .write() + .set_max_column_position(max_column_position.max_pos); + let (select_list, location, alias) = check_transform_query(query)?; + let (copy_into_table_plan, mut stage_table_info) = self + .bind_copy_into_table_common(bind_context, stmt, location) + .await?; + + copy_into_table_plan + .collect_files(self.ctx.as_ref(), &mut stage_table_info, stmt.force) + .await?; + self.bind_copy_from_query_into_table( + bind_context, + copy_into_table_plan, + stage_table_info, + select_list, + alias, + stmt.force, + ) + .await + } + } } pub(crate) fn resolve_copy_pattern( @@ -140,7 +154,7 @@ impl<'a> Binder { bind_context: &mut BindContext, stmt: &CopyIntoTableStmt, location: &FileLocation, - ) -> Result { + ) -> Result<(CopyIntoTablePlan, StageTableInfo)> { let (catalog_name, database_name, table_name) = self.normalize_object_identifier_triple( &stmt.dst.catalog, &stmt.dst.database, @@ -182,31 +196,27 @@ impl<'a> Binder { } else { None }; - // source: crate::plans::AppendSource::Stage(Box::new(StageTableInfo { - // schema: stage_schema, - // files_info, - // stage_info, - // files_to_copy: None, - // duplicated_files_detected: vec![], - // is_select: false, - // default_values, - // copy_into_location_options: Default::default(), - // })), - - // let source = SExpr::create_leaf(Arc::new(plan)); - // let copy_into = CopyIntoTablePlan { - // catalog_name, - // database_name, - // table_name, - // force: stmt.force, - // values_consts: vec![], - // required_source_schema: required_values_schema.clone(), - // required_values_schema: required_values_schema.clone(), - // }; - // let copy_into_table = SExpr::create_unary(Arc::new(plan), source); - - // Ok() - todo!() + let stage_table_info = StageTableInfo { + schema: stage_schema, + files_info, + stage_info, + files_to_copy: None, + duplicated_files_detected: vec![], + is_select: false, + default_values, + copy_into_location_options: Default::default(), + }; + let copy_into_plan = CopyIntoTablePlan { + catalog_name, + database_name, + table_name, + values_consts: vec![], + required_source_schema: required_values_schema.clone(), + required_values_schema: required_values_schema.clone(), + project_columns: None, + mutation_kind: MutationKind::CopyInto, + }; + Ok((copy_into_plan, stage_table_info)) } /// Bind COPY INFO
FROM @@ -214,48 +224,81 @@ impl<'a> Binder { async fn bind_copy_into_table_from_location( &mut self, bind_ctx: &BindContext, - plan: CopyIntoTablePlan, + copy_into_table_plan: CopyIntoTablePlan, + stage_table_info: StageTableInfo, + force: bool, ) -> Result { - // let use_query = matches!(&plan.source.as_stage().unwrap().stage_info.file_format_params, - // FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); - - // if use_query { - // let mut select_list = Vec::with_capacity(plan.required_source_schema.num_fields()); - // for dest_field in plan.required_source_schema.fields().iter() { - // let column = Expr::ColumnRef { - // span: None, - // column: ColumnRef { - // database: None, - // table: None, - // column: AstColumnID::Name(Identifier::from_name( - // None, - // dest_field.name().to_string(), - // )), - // }, - // }; - // // cast types to variant, tuple will be rewrite as `json_object_keep_null` - // let expr = if dest_field.data_type().remove_nullable() == DataType::Variant { - // Expr::Cast { - // span: None, - // expr: Box::new(column), - // target_type: TypeName::Variant, - // pg_style: false, - // } - // } else { - // column - // }; - // select_list.push(SelectTarget::AliasedExpr { - // expr: Box::new(expr), - // alias: None, - // }); - // } - - // self.bind_copy_from_query_into_table(bind_ctx, plan, &select_list, &None) - // .await - // } else { - // Ok(Plan::CopyIntoTable(Box::new(plan))) - // } - todo!() + let use_query = matches!(&stage_table_info.stage_info.file_format_params, + FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); + + if use_query { + let mut select_list = + Vec::with_capacity(copy_into_table_plan.required_source_schema.num_fields()); + for dest_field in copy_into_table_plan.required_source_schema.fields().iter() { + let column = Expr::ColumnRef { + span: None, + column: ColumnRef { + database: None, + table: None, + column: AstColumnID::Name(Identifier::from_name( + None, + dest_field.name().to_string(), + )), + }, + }; + // cast types to variant, tuple will be rewrite as `json_object_keep_null` + let expr = if dest_field.data_type().remove_nullable() == DataType::Variant { + Expr::Cast { + span: None, + expr: Box::new(column), + target_type: TypeName::Variant, + pg_style: false, + } + } else { + column + }; + select_list.push(SelectTarget::AliasedExpr { + expr: Box::new(expr), + alias: None, + }); + } + + self.bind_copy_from_query_into_table( + bind_ctx, + copy_into_table_plan, + stage_table_info, + &select_list, + &None, + force, + ) + .await + } else { + let (scan, _) = self + .bind_stage_table( + self.ctx.clone(), + bind_ctx, + stage_table_info.stage_info.clone(), + stage_table_info.files_info.clone(), + &None, + stage_table_info.files_to_copy.clone(), + ) + .await?; + + let copy_into = + SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(scan)); + Ok(Plan::CopyIntoTable { + s_expr: Box::new(copy_into), + metadata: self.metadata.clone(), + stage_context: Some(Box::new(StageContext { + purge: stage_table_info.stage_info.copy_options.purge, + force, + files_to_copy: stage_table_info.files_to_copy.unwrap_or_default(), + duplicated_files_detected: stage_table_info.duplicated_files_detected, + stage_info: stage_table_info.stage_info, + })), + overwrite: false, + }) + } } #[async_backtrace::framed] @@ -308,7 +351,6 @@ impl<'a> Binder { table_name: String, required_values_schema: DataSchemaRef, values_str: &str, - write_mode: CopyIntoTableMode, ) -> Result { let (data_schema, const_columns) = if values_str.is_empty() { (required_values_schema.clone(), vec![]) @@ -329,7 +371,7 @@ impl<'a> Binder { // as the vanilla Copy-Into does. // thus, we do not care about the "duplicated_files_detected", just set it to empty vector. let files_to_copy = list_stage_files(&stage_info, &files_info, thread_num, None).await?; - // let duplicated_files_detected = vec![]; + let duplicated_files_detected = vec![]; let stage_schema = infer_table_schema(&data_schema)?; @@ -337,31 +379,35 @@ impl<'a> Binder { .prepare_default_values(bind_context, &data_schema) .await?; - // let plan = CopyIntoTablePlan { - // catalog_name, - // database_name, - // table_name, - // no_file_to_copy: false, - // required_source_schema: data_schema.clone(), - // required_values_schema, - // values_consts: const_columns, - // force: true, - // source: crate::plans::AppendSource::Stage(Box::new(StageTableInfo { - // schema: stage_schema, - // files_info, - // stage_info, - // files_to_copy: Some(files_to_copy), - // duplicated_files_detected, - // is_select: false, - // default_values: Some(default_values), - // copy_into_location_options: Default::default(), - // })), - // enable_distributed: false, - // }; - - // self.bind_copy_into_table_from_location(bind_context, plan) - // .await - todo!() + let stage_table_info = StageTableInfo { + schema: stage_schema, + files_info, + stage_info, + files_to_copy: Some(files_to_copy), + duplicated_files_detected, + is_select: false, + default_values: Some(default_values), + copy_into_location_options: Default::default(), + }; + + let copy_into_table_plan = CopyIntoTablePlan { + catalog_name, + database_name, + table_name, + required_values_schema, + values_consts: const_columns, + required_source_schema: data_schema.clone(), + mutation_kind: MutationKind::Insert, + project_columns: None, + }; + + self.bind_copy_into_table_from_location( + bind_context, + copy_into_table_plan, + stage_table_info, + true, + ) + .await } /// Bind COPY INTO
FROM @@ -369,98 +415,100 @@ impl<'a> Binder { async fn bind_copy_from_query_into_table( &mut self, bind_context: &BindContext, - mut plan: CopyIntoTablePlan, + mut copy_into_table_plan: CopyIntoTablePlan, + stage_table_info: StageTableInfo, select_list: &'a [SelectTarget], alias: &Option, + force: bool, ) -> Result { - // plan.collect_files(self.ctx.as_ref()).await?; - // if plan.no_file_to_copy { - // return Ok(Plan::CopyIntoTable(Box::new(plan))); - // } - - // let stage_table_info = plan.source.as_stage().unwrap(); - // let table_ctx = self.ctx.clone(); - // let (s_expr, mut from_context) = self - // .bind_stage_table( - // table_ctx, - // bind_context, - // stage_table_info.stage_info.clone(), - // stage_table_info.files_info.clone(), - // alias, - // stage_table_info.files_to_copy.clone(), - // ) - // .await?; - - // // Generate an analyzed select list with from context - // let select_list = self.normalize_select_list(&mut from_context, select_list)?; - - // for item in select_list.items.iter() { - // if !self.check_allowed_scalar_expr_with_subquery_for_copy_table(&item.scalar)? { - // // in fact, if there is a join, we will stop in `check_transform_query()` - // return Err(ErrorCode::SemanticError( - // "copy into table source can't contain window|aggregate|join functions" - // .to_string(), - // )); - // }; - // } - // let (scalar_items, projections) = self.analyze_projection( - // &from_context.aggregate_info, - // &from_context.windows, - // &select_list, - // )?; - - // if projections.len() != plan.required_source_schema.num_fields() { - // return Err(ErrorCode::BadArguments(format!( - // "Number of columns in select list ({}) does not match that of the corresponding table ({})", - // projections.len(), - // plan.required_source_schema.num_fields(), - // ))); - // } - - // let mut s_expr = - // self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?; - - // // rewrite async function and udf - // s_expr = self.rewrite_udf(&mut from_context, s_expr)?; - - // let mut output_context = BindContext::new(); - // output_context.parent = from_context.parent; - // output_context.columns = from_context.columns; - - // // disable variant check to allow copy invalid JSON into tables - // let disable_variant_check = stage_table_info - // .stage_info - // .copy_options - // .disable_variant_check; - // if disable_variant_check { - // let hints = Hint { - // hints_list: vec![HintItem { - // name: Identifier::from_name(None, "disable_variant_check"), - // expr: Expr::Literal { - // span: None, - // value: Literal::UInt64(1), - // }, - // }], - // }; - // if let Some(e) = self.opt_hints_set_var(&mut output_context, &hints).err() { - // warn!( - // "In COPY resolve optimize hints {:?} failed, err: {:?}", - // hints, e - // ); - // } - // } - - // plan.source = AppendSource::Query(Box::new(Plan::Query { - // s_expr: Box::new(s_expr), - // metadata: self.metadata.clone(), - // bind_context: Box::new(output_context), - // rewrite_kind: None, - // ignore_result: false, - // formatted_ast: None, - // })); - - // Ok(Plan::CopyIntoTable(Box::new(plan))) - todo!() + let table_ctx = self.ctx.clone(); + let (s_expr, mut from_context) = self + .bind_stage_table( + table_ctx, + bind_context, + stage_table_info.stage_info.clone(), + stage_table_info.files_info.clone(), + alias, + stage_table_info.files_to_copy.clone(), + ) + .await?; + + // Generate an analyzed select list with from context + let select_list = self.normalize_select_list(&mut from_context, select_list)?; + + for item in select_list.items.iter() { + if !self.check_allowed_scalar_expr_with_subquery_for_copy_table(&item.scalar)? { + // in fact, if there is a join, we will stop in `check_transform_query()` + return Err(ErrorCode::SemanticError( + "copy into table source can't contain window|aggregate|join functions" + .to_string(), + )); + }; + } + let (scalar_items, projections) = self.analyze_projection( + &from_context.aggregate_info, + &from_context.windows, + &select_list, + )?; + + if projections.len() != copy_into_table_plan.required_source_schema.num_fields() { + return Err(ErrorCode::BadArguments(format!( + "Number of columns in select list ({}) does not match that of the corresponding table ({})", + projections.len(), + copy_into_table_plan.required_source_schema.num_fields(), + ))); + } + + copy_into_table_plan.project_columns = Some(projections.clone()); + + let mut s_expr = + self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?; + + // rewrite async function and udf + s_expr = self.rewrite_udf(&mut from_context, s_expr)?; + + let mut output_context = BindContext::new(); + output_context.parent = from_context.parent; + output_context.columns = from_context.columns; + + // disable variant check to allow copy invalid JSON into tables + let disable_variant_check = stage_table_info + .stage_info + .copy_options + .disable_variant_check; + if disable_variant_check { + let hints = Hint { + hints_list: vec![HintItem { + name: Identifier::from_name(None, "disable_variant_check"), + expr: Expr::Literal { + span: None, + value: Literal::UInt64(1), + }, + }], + }; + if let Some(e) = self.opt_hints_set_var(&mut output_context, &hints).err() { + warn!( + "In COPY resolve optimize hints {:?} failed, err: {:?}", + hints, e + ); + } + } + + let copy_into = + SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(s_expr)); + + Ok(Plan::CopyIntoTable { + s_expr: Box::new(copy_into), + metadata: self.metadata.clone(), + stage_context: Some(Box::new(StageContext { + purge: stage_table_info.stage_info.copy_options.purge, + force, + files_to_copy: stage_table_info.files_to_copy.unwrap_or_default(), + duplicated_files_detected: stage_table_info.duplicated_files_detected, + stage_info: stage_table_info.stage_info, + })), + overwrite: false, + }) } #[async_backtrace::framed] diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index be0166e4b4353..9b948f82ee40c 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -17,22 +17,21 @@ use std::sync::Arc; use databend_common_ast::ast::Identifier; use databend_common_ast::ast::InsertSource; use databend_common_ast::ast::InsertStmt; -use databend_common_ast::ast::Statement; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::DataSchema; +use databend_common_expression::DataSchemaRef; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRefExt; use super::util::TableIdentifier; use crate::binder::Binder; +use crate::executor::physical_plans::MutationKind; +use crate::executor::physical_plans::Values; use crate::normalize_identifier; -use crate::plans::CopyIntoTableMode; +use crate::optimizer::SExpr; use crate::plans::CopyIntoTablePlan; -use crate::plans::Insert; -use crate::plans::InsertInputSource; -use crate::plans::InsertValue; use crate::plans::Plan; +use crate::plans::ValueScan; use crate::BindContext; impl Binder { @@ -69,106 +68,8 @@ impl Binder { Ok(TableSchemaRefExt::create(fields)) } - // #[async_backtrace::framed] - // pub(in crate::planner::binder) async fn bind_insert( - // &mut self, - // bind_context: &mut BindContext, - // stmt: &InsertStmt, - // ) -> Result { - // let InsertStmt { - // with, - // catalog, - // database, - // table, - // columns, - // source, - // overwrite, - // .. - // } = stmt; - - // self.init_cte(bind_context, with)?; - - // let table_identifier = TableIdentifier::new(self, catalog, database, table, &None); - // let (catalog_name, database_name, table_name) = ( - // table_identifier.catalog_name(), - // table_identifier.database_name(), - // table_identifier.table_name(), - // ); - - // let table = self - // .ctx - // .get_table(&catalog_name, &database_name, &table_name) - // .await - // .map_err(|err| table_identifier.not_found_suggest_error(err))?; - - // let schema = self.schema_project(&table.schema(), columns)?; - - // let input_source: Result = match source.clone() { - // InsertSource::Values { rows } => { - // let mut new_rows = Vec::with_capacity(rows.len()); - // for row in rows { - // let new_row = bind_context - // .exprs_to_scalar( - // &row, - // &Arc::new(schema.clone().into()), - // self.ctx.clone(), - // &self.name_resolution_ctx, - // self.metadata.clone(), - // ) - // .await?; - // new_rows.push(new_row); - // } - // Ok(InsertInputSource::Values(InsertValue::Values { - // rows: new_rows, - // })) - // } - // InsertSource::RawValues { rest_str, start } => { - // let values_str = rest_str.trim_end_matches(';').trim_start().to_owned(); - // match self.ctx.get_stage_attachment() { - // Some(attachment) => { - // return self - // .bind_copy_from_attachment( - // bind_context, - // attachment, - // catalog_name, - // database_name, - // table_name, - // Arc::new(schema.into()), - // &values_str, - // CopyIntoTableMode::Insert { - // overwrite: *overwrite, - // }, - // ) - // .await; - // } - // None => Ok(InsertInputSource::Values(InsertValue::RawValues { - // data: rest_str, - // start, - // })), - // } - // } - // InsertSource::Select { query } => { - // let statement = Statement::Query(query); - // let select_plan = self.bind_statement(bind_context, &statement).await?; - // Ok(InsertInputSource::SelectPlan(Box::new(select_plan))) - // } - // }; - - // let plan = Insert { - // catalog: catalog_name.to_string(), - // database: database_name.to_string(), - // table: table_name, - // schema, - // overwrite: *overwrite, - // source: input_source?, - // table_info: None, - // }; - - // Ok(Plan::Insert(Box::new(plan))) - // } - #[async_backtrace::framed] - pub(in crate::planner::binder) async fn bind_insert_to_copy( + pub(in crate::planner::binder) async fn bind_insert( &mut self, bind_context: &mut BindContext, stmt: &InsertStmt, @@ -183,6 +84,7 @@ impl Binder { overwrite, .. } = stmt; + self.init_cte(bind_context, with)?; let table_identifier = TableIdentifier::new(self, catalog, database, table, &None); @@ -198,23 +100,91 @@ impl Binder { .await .map_err(|err| table_identifier.not_found_suggest_error(err))?; - let schema: Arc = - Arc::new(self.schema_project(&table.schema(), columns)?.into()); + let schema = self.schema_project(&table.schema(), columns)?; + let schema: DataSchemaRef = Arc::new(schema.into()); + + let (source, project_columns) = match source.clone() { + InsertSource::Values { rows } => { + let mut new_rows = Vec::with_capacity(rows.len()); + for row in rows { + let new_row = bind_context + .exprs_to_scalar( + &row, + &schema, + self.ctx.clone(), + &self.name_resolution_ctx, + self.metadata.clone(), + ) + .await?; + new_rows.push(new_row); + } + ( + SExpr::create_leaf(Arc::new( + ValueScan { + values: Values::Values(Arc::new(new_rows)), + dest_schema: schema.clone(), + } + .into(), + )), + None, + ) + } + InsertSource::RawValues { rest_str, start } => { + let values_str = rest_str.trim_end_matches(';').trim_start().to_owned(); + match self.ctx.get_stage_attachment() { + Some(attachment) => { + return self + .bind_copy_from_attachment( + bind_context, + attachment, + catalog_name, + database_name, + table_name, + schema, + &values_str, + ) + .await; + } + None => ( + SExpr::create_leaf(Arc::new( + ValueScan { + values: Values::RawValues { + rest_str: Arc::new(values_str), + start, + }, + dest_schema: schema.clone(), + } + .into(), + )), + None, + ), + } + } + InsertSource::Select { query } => { + let (source, bind_context) = self.bind_query(bind_context, &query)?; + (source, Some(bind_context.columns.clone())) + } + }; - // let plan = CopyIntoTablePlan { - // no_file_to_copy: false, - // catalog_name, - // database_name, - // table_name, - // required_values_schema: schema.clone(), - // values_consts: todo!(), - // required_source_schema: schema, - // force: todo!(), - // enable_distributed: false, - // source: todo!(), - // }; + let copy_into = CopyIntoTablePlan { + catalog_name, + database_name, + table_name, + required_values_schema: schema.clone(), + values_consts: vec![], + required_source_schema: schema, + mutation_kind: MutationKind::Insert, + project_columns, + }; - // Ok(Plan::CopyIntoTable(Box::new(plan))) - todo!() + Ok(Plan::CopyIntoTable { + s_expr: Box::new(SExpr::create_unary( + Arc::new(copy_into.into()), + Arc::new(source), + )), + metadata: self.metadata.clone(), + stage_context: None, + overwrite: *overwrite, + }) } } diff --git a/src/query/sql/src/planner/binder/replace.rs b/src/query/sql/src/planner/binder/replace.rs index c467ea8a6d1c3..5aa37952b2a91 100644 --- a/src/query/sql/src/planner/binder/replace.rs +++ b/src/query/sql/src/planner/binder/replace.rs @@ -22,7 +22,6 @@ use databend_common_exception::Result; use crate::binder::Binder; use crate::normalize_identifier; -use crate::plans::CopyIntoTableMode; use crate::plans::InsertInputSource; use crate::plans::InsertValue; use crate::plans::Plan; @@ -121,7 +120,6 @@ impl Binder { table_name.clone(), Arc::new(schema.clone().into()), &values_str, - CopyIntoTableMode::Replace, ) .await?; Ok(InsertInputSource::Stage(Box::new(plan))) diff --git a/src/query/sql/src/planner/binder/util.rs b/src/query/sql/src/planner/binder/util.rs index f082224649b4f..2504c405cd029 100644 --- a/src/query/sql/src/planner/binder/util.rs +++ b/src/query/sql/src/planner/binder/util.rs @@ -88,6 +88,8 @@ impl Binder { | RelOperator::Mutation(_) | RelOperator::Recluster(_) | RelOperator::MutationSource(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => { return Err(ErrorCode::SyntaxException(format!( "{:?} is not allowed in recursive cte", diff --git a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs index d973b91a69447..065dd3bc4e61c 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs @@ -197,6 +197,8 @@ impl SubqueryRewriter { | RelOperator::Mutation(_) | RelOperator::MutationSource(_) | RelOperator::Recluster(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => Ok(s_expr.clone()), } } diff --git a/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs b/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs index a2363d69029b1..38600bb26f2f8 100644 --- a/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs +++ b/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs @@ -94,6 +94,8 @@ pub async fn dynamic_sample( | RelOperator::Mutation(_) | RelOperator::Recluster(_) | RelOperator::CompactBlock(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) | RelOperator::MutationSource(_) => { s_expr.plan().derive_stats(&RelExpr::with_s_expr(s_expr)) } diff --git a/src/query/sql/src/planner/optimizer/format.rs b/src/query/sql/src/planner/optimizer/format.rs index b72f217ae4cbb..d3705365f16b6 100644 --- a/src/query/sql/src/planner/optimizer/format.rs +++ b/src/query/sql/src/planner/optimizer/format.rs @@ -77,6 +77,8 @@ pub fn display_rel_op(rel_op: &RelOperator) -> String { RelOperator::MutationSource(_) => "MutationSource".to_string(), RelOperator::Recluster(_) => "Recluster".to_string(), RelOperator::CompactBlock(_) => "CompactBlock".to_string(), + RelOperator::CopyIntoTable(_) => "CopyIntoTable".to_string(), + RelOperator::ValueScan(_) => "ValueScan".to_string(), } } diff --git a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs index db2e0d8699b4c..3c6e17ea894d4 100644 --- a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs +++ b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs @@ -296,7 +296,9 @@ impl DPhpy { | RelOperator::Mutation(_) | RelOperator::MutationSource(_) | RelOperator::Recluster(_) - | RelOperator::CompactBlock(_) => Ok((Arc::new(s_expr.clone()), true)), + | RelOperator::CompactBlock(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) => Ok((Arc::new(s_expr.clone()), true)), } } diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index f20ef7dc27eb9..00ecfc9825143 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -47,6 +47,7 @@ use crate::optimizer::SExpr; use crate::optimizer::DEFAULT_REWRITE_RULES; use crate::planner::query_executor::QueryExecutor; use crate::plans::CopyIntoLocationPlan; +use crate::plans::Exchange; use crate::plans::Join; use crate::plans::JoinType; use crate::plans::MatchedEvaluator; @@ -286,40 +287,47 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result from: Box::new(Box::pin(optimize(opt_ctx, *from)).await?), options, })), - // Plan::CopyIntoTable(mut plan) if !plan.no_file_to_copy => { - // plan.enable_distributed = opt_ctx.enable_distributed_optimization - // && opt_ctx - // .table_ctx - // .get_settings() - // .get_enable_distributed_copy()?; - // info!( - // "after optimization enable_distributed_copy? : {}", - // plan.enable_distributed - // ); - - // // if let Some(p) = &plan.query { - // // let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; - // // plan.query = Some(Box::new(optimized_plan)); - // // } - // Ok(Plan::CopyIntoTable(plan)) - // } + Plan::CopyIntoTable { + s_expr, + metadata, + stage_context, + overwrite, + } => { + let enable_distributed = opt_ctx.enable_distributed_optimization + && opt_ctx + .table_ctx + .get_settings() + .get_enable_distributed_copy()?; + info!( + "after optimization enable_distributed_copy? : {}", + enable_distributed + ); + let mut optimized_source = + optimize_query(&mut opt_ctx, s_expr.child(0)?.clone()).await?; + let optimized = match enable_distributed { + true => { + if let RelOperator::Exchange(Exchange::Merge) = optimized_source.plan.as_ref() { + optimized_source = optimized_source.child(0).unwrap().clone(); + } + let copy_into = SExpr::create_unary( + Arc::new(s_expr.plan().clone()), + Arc::new(optimized_source), + ); + let exchange = Arc::new(RelOperator::Exchange(Exchange::Merge)); + SExpr::create_unary(exchange, Arc::new(copy_into)) + } + false => { + SExpr::create_unary(Arc::new(s_expr.plan().clone()), Arc::new(optimized_source)) + } + }; + Ok(Plan::CopyIntoTable { + s_expr: Box::new(optimized), + metadata, + stage_context, + overwrite, + }) + } Plan::DataMutation { s_expr, .. } => optimize_mutation(opt_ctx, *s_expr).await, - - // distributed insert will be optimized in `physical_plan_builder` - // Plan::Insert(mut plan) => { - // match plan.source { - // InsertInputSource::SelectPlan(p) => { - // let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; - // plan.source = InsertInputSource::SelectPlan(Box::new(optimized_plan)); - // } - // InsertInputSource::Stage(p) => { - // let optimized_plan = optimize(opt_ctx.clone(), *p.clone()).await?; - // plan.source = InsertInputSource::Stage(Box::new(optimized_plan)); - // } - // _ => {} - // } - // Ok(Plan::Insert(plan)) - // } Plan::InsertMultiTable(mut plan) => { plan.input_source = optimize(opt_ctx.clone(), plan.input_source.clone()).await?; Ok(Plan::InsertMultiTable(plan)) diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs index 29dfe4872f608..a0c8420652ee1 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs @@ -142,7 +142,9 @@ fn find_group_by_keys(child: &SExpr, group_by_keys: &mut HashSet) -> | RelOperator::Mutation(_) | RelOperator::MutationSource(_) | RelOperator::Recluster(_) - | RelOperator::CompactBlock(_) => {} + | RelOperator::CompactBlock(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) => {} } Ok(()) } diff --git a/src/query/sql/src/planner/optimizer/s_expr.rs b/src/query/sql/src/planner/optimizer/s_expr.rs index d56f921e50536..6d42d8ba818e0 100644 --- a/src/query/sql/src/planner/optimizer/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/s_expr.rs @@ -340,6 +340,8 @@ impl SExpr { | RelOperator::RecursiveCteScan(_) | RelOperator::Mutation(_) | RelOperator::Recluster(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => {} }; for child in &self.children { @@ -441,6 +443,8 @@ fn find_subquery(rel_op: &RelOperator) -> bool { | RelOperator::RecursiveCteScan(_) | RelOperator::Mutation(_) | RelOperator::Recluster(_) + | RelOperator::CopyIntoTable(_) + | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => false, RelOperator::Join(op) => { op.equi_conditions.iter().any(|condition| { diff --git a/src/query/sql/src/planner/plans/copy_into_table.rs b/src/query/sql/src/planner/plans/copy_into_table.rs index 9196ede8ffceb..4fbe1d7c7ceba 100644 --- a/src/query/sql/src/planner/plans/copy_into_table.rs +++ b/src/query/sql/src/planner/plans/copy_into_table.rs @@ -13,9 +13,8 @@ // limitations under the License. use std::fmt::Debug; -use std::fmt::Display; use std::fmt::Formatter; -use std::str::FromStr; +use std::hash::Hash; use std::sync::Arc; use std::time::Instant; @@ -31,91 +30,24 @@ use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::DataSchemaRefExt; use databend_common_expression::Scalar; +use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::COPY_MAX_FILES_COMMIT_MSG; use databend_common_meta_app::principal::COPY_MAX_FILES_PER_COMMIT; use databend_common_metrics::storage::*; use databend_common_storage::init_stage_operator; use databend_common_storage::StageFileInfo; -use enum_as_inner::EnumAsInner; use log::info; -use super::InsertValue; -use crate::plans::Plan; +use super::Operator; +use super::RelOp; +use crate::executor::physical_plans::CopyIntoTable; +use crate::executor::physical_plans::MutationKind; +use crate::executor::PhysicalPlan; +use crate::executor::PhysicalPlanBuilder; +use crate::optimizer::SExpr; +use crate::ColumnBinding; -#[derive(PartialEq, Eq, Clone, Debug, serde::Serialize, serde::Deserialize)] -pub enum ValidationMode { - None, - ReturnNRows(u64), - ReturnErrors, - ReturnAllErrors, -} - -impl Display for ValidationMode { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - match self { - ValidationMode::None => write!(f, ""), - ValidationMode::ReturnNRows(v) => write!(f, "RETURN_ROWS={v}"), - ValidationMode::ReturnErrors => write!(f, "RETURN_ERRORS"), - ValidationMode::ReturnAllErrors => write!(f, "RETURN_ALL_ERRORS"), - } - } -} - -impl FromStr for ValidationMode { - type Err = String; - fn from_str(s: &str) -> std::result::Result { - match s.to_uppercase().as_str() { - "" => Ok(ValidationMode::None), - "RETURN_ERRORS" => Ok(ValidationMode::ReturnErrors), - "RETURN_ALL_ERRORS" => Ok(ValidationMode::ReturnAllErrors), - v => { - let rows_str = v.replace("RETURN_", "").replace("_ROWS", ""); - let rows = rows_str.parse::(); - match rows { - Ok(v) => Ok(ValidationMode::ReturnNRows(v)), - Err(_) => Err(format!( - "Unknown validation mode:{v:?}, must one of {{ RETURN__ROWS | RETURN_ERRORS | RETURN_ALL_ERRORS}}" - )), - } - } - } - } -} - -#[derive(Clone, Copy, Eq, PartialEq, Debug, serde::Serialize, serde::Deserialize)] -pub enum CopyIntoTableMode { - Insert { overwrite: bool }, - Replace, - Copy, -} - -impl Display for CopyIntoTableMode { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - match self { - CopyIntoTableMode::Insert { overwrite } => { - if *overwrite { - write!(f, "INSERT OVERWRITE") - } else { - write!(f, "INSERT") - } - } - CopyIntoTableMode::Replace => write!(f, "REPLACE"), - CopyIntoTableMode::Copy => write!(f, "COPY"), - } - } -} - -impl CopyIntoTableMode { - pub fn is_overwrite(&self) -> bool { - match self { - CopyIntoTableMode::Insert { overwrite } => *overwrite, - CopyIntoTableMode::Replace => false, - CopyIntoTableMode::Copy => false, - } - } -} - -#[derive(Clone)] +#[derive(Clone, PartialEq, Eq)] pub struct CopyIntoTablePlan { pub catalog_name: String, pub database_name: String, @@ -123,150 +55,142 @@ pub struct CopyIntoTablePlan { pub required_values_schema: DataSchemaRef, pub values_consts: Vec, pub required_source_schema: DataSchemaRef, - pub force: bool, + pub project_columns: Option>, + pub mutation_kind: MutationKind, } -#[derive(Clone, EnumAsInner)] -pub enum AppendSource { - Query(Box), - Stage(Box), - Values(InsertValue), +#[derive(Clone, Debug)] +pub struct StageContext { + pub purge: bool, + pub force: bool, + pub files_to_copy: Vec, + pub duplicated_files_detected: Vec, + pub stage_info: StageInfo, } -impl AppendSource { - pub fn files_to_copy(&self) -> Vec { - match self { - AppendSource::Stage(stage) => stage.files_to_copy.clone().unwrap_or_default(), - _ => vec![], - } - } - - pub fn duplicated_files_detected(&self) -> Vec { - match self { - AppendSource::Stage(stage) => stage.duplicated_files_detected.clone(), - _ => vec![], - } +impl Hash for CopyIntoTablePlan { + fn hash(&self, state: &mut H) { + self.catalog_name.hash(state); + self.database_name.hash(state); + self.table_name.hash(state); } } impl CopyIntoTablePlan { - pub async fn collect_files(&mut self, ctx: &dyn TableContext) -> Result<()> { - // ctx.set_status_info("begin to list files"); - // let start = Instant::now(); - - // let stage_table_info = self.source.as_stage().unwrap(); - // let max_files = stage_table_info.stage_info.copy_options.max_files; - // let max_files = if max_files == 0 { - // None - // } else { - // Some(max_files) - // }; - - // let thread_num = ctx.get_settings().get_max_threads()? as usize; - // let operator = init_stage_operator(&stage_table_info.stage_info)?; - // let all_source_file_infos = if operator.info().native_capability().blocking { - // if self.force { - // stage_table_info - // .files_info - // .blocking_list(&operator, max_files) - // } else { - // stage_table_info.files_info.blocking_list(&operator, None) - // } - // } else if self.force { - // stage_table_info - // .files_info - // .list(&operator, thread_num, max_files) - // .await - // } else { - // stage_table_info - // .files_info - // .list(&operator, thread_num, None) - // .await - // }?; - - // let num_all_files = all_source_file_infos.len(); - - // let end_get_all_source = Instant::now(); - // let cost_get_all_files = end_get_all_source.duration_since(start).as_millis(); - // metrics_inc_copy_collect_files_get_all_source_files_milliseconds(cost_get_all_files as u64); - - // ctx.set_status_info(&format!( - // "end list files: got {} files, time used {:?}", - // num_all_files, - // start.elapsed() - // )); - - // let (need_copy_file_infos, duplicated) = if self.force { - // if !stage_table_info.stage_info.copy_options.purge - // && all_source_file_infos.len() > COPY_MAX_FILES_PER_COMMIT - // { - // return Err(ErrorCode::Internal(COPY_MAX_FILES_COMMIT_MSG)); - // } - // info!( - // "force mode, ignore file filtering. ({}.{})", - // &self.database_name, &self.table_name - // ); - // (all_source_file_infos, vec![]) - // } else { - // // Status. - // ctx.set_status_info("begin filtering out copied files"); - - // let filter_start = Instant::now(); - // let FilteredCopyFiles { - // files_to_copy, - // duplicated_files, - // } = ctx - // .filter_out_copied_files( - // &self.catalog_name, - // &self.database_name, - // &self.table_name, - // &all_source_file_infos, - // max_files, - // ) - // .await?; - // ctx.set_status_info(&format!( - // "end filtering out copied files: {}, time used {:?}", - // num_all_files, - // filter_start.elapsed() - // )); - - // let end_filter_out = Instant::now(); - // let cost_filter_out = end_filter_out - // .duration_since(end_get_all_source) - // .as_millis(); - // metrics_inc_copy_filter_out_copied_files_entire_milliseconds(cost_filter_out as u64); - - // (files_to_copy, duplicated_files) - // }; - - // let num_copied_files = need_copy_file_infos.len(); - // let copied_bytes: u64 = need_copy_file_infos.iter().map(|i| i.size).sum(); - - // info!( - // "collect files with max_files={:?} finished, need to copy {} files, {} bytes; skip {} duplicated files, time used:{:?}", - // max_files, - // need_copy_file_infos.len(), - // copied_bytes, - // num_all_files - num_copied_files, - // start.elapsed() - // ); - - // if need_copy_file_infos.is_empty() { - // self.no_file_to_copy = true; - // } - - // let stage_table_info = self.source.as_stage_mut().unwrap(); - - // stage_table_info.files_to_copy = Some(need_copy_file_infos); - // stage_table_info.duplicated_files_detected = duplicated; - - // Ok(()) - todo!() + pub async fn collect_files( + &self, + ctx: &dyn TableContext, + stage_table_info: &mut StageTableInfo, + force: bool, + ) -> Result<()> { + ctx.set_status_info("begin to list files"); + let start = Instant::now(); + + let max_files = stage_table_info.stage_info.copy_options.max_files; + let max_files = if max_files == 0 { + None + } else { + Some(max_files) + }; + + let thread_num = ctx.get_settings().get_max_threads()? as usize; + let operator = init_stage_operator(&stage_table_info.stage_info)?; + let all_source_file_infos = if operator.info().native_capability().blocking { + if force { + stage_table_info + .files_info + .blocking_list(&operator, max_files) + } else { + stage_table_info.files_info.blocking_list(&operator, None) + } + } else if force { + stage_table_info + .files_info + .list(&operator, thread_num, max_files) + .await + } else { + stage_table_info + .files_info + .list(&operator, thread_num, None) + .await + }?; + + let num_all_files = all_source_file_infos.len(); + + let end_get_all_source = Instant::now(); + let cost_get_all_files = end_get_all_source.duration_since(start).as_millis(); + metrics_inc_copy_collect_files_get_all_source_files_milliseconds(cost_get_all_files as u64); + + ctx.set_status_info(&format!( + "end list files: got {} files, time used {:?}", + num_all_files, + start.elapsed() + )); + + let (need_copy_file_infos, duplicated) = if force { + if !stage_table_info.stage_info.copy_options.purge + && all_source_file_infos.len() > COPY_MAX_FILES_PER_COMMIT + { + return Err(ErrorCode::Internal(COPY_MAX_FILES_COMMIT_MSG)); + } + info!( + "force mode, ignore file filtering. ({}.{})", + &self.database_name, &self.table_name + ); + (all_source_file_infos, vec![]) + } else { + // Status. + ctx.set_status_info("begin filtering out copied files"); + + let filter_start = Instant::now(); + let FilteredCopyFiles { + files_to_copy, + duplicated_files, + } = ctx + .filter_out_copied_files( + &self.catalog_name, + &self.database_name, + &self.table_name, + &all_source_file_infos, + max_files, + ) + .await?; + ctx.set_status_info(&format!( + "end filtering out copied files: {}, time used {:?}", + num_all_files, + filter_start.elapsed() + )); + + let end_filter_out = Instant::now(); + let cost_filter_out = end_filter_out + .duration_since(end_get_all_source) + .as_millis(); + metrics_inc_copy_filter_out_copied_files_entire_milliseconds(cost_filter_out as u64); + + (files_to_copy, duplicated_files) + }; + + let num_copied_files = need_copy_file_infos.len(); + let copied_bytes: u64 = need_copy_file_infos.iter().map(|i| i.size).sum(); + + info!( + "collect files with max_files={:?} finished, need to copy {} files, {} bytes; skip {} duplicated files, time used:{:?}", + max_files, + need_copy_file_infos.len(), + copied_bytes, + num_all_files - num_copied_files, + start.elapsed() + ); + + stage_table_info.files_to_copy = Some(need_copy_file_infos); + stage_table_info.duplicated_files_detected = duplicated; + + Ok(()) } } impl Debug for CopyIntoTablePlan { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + fn fmt(&self, _f: &mut Formatter) -> std::fmt::Result { // let CopyIntoTablePlan { // catalog_name: catalog_info, // database_name, @@ -294,8 +218,6 @@ impl Debug for CopyIntoTablePlan { } } -/// CopyPlan supports CopyIntoTable & CopyIntoStage - impl CopyIntoTablePlan { fn copy_into_table_schema() -> DataSchemaRef { DataSchemaRefExt::create(vec![ @@ -314,11 +236,41 @@ impl CopyIntoTablePlan { } pub fn schema(&self) -> DataSchemaRef { - // if self.from_attachment { - // Arc::new(DataSchema::empty()) - // } else { - // Self::copy_into_table_schema() - // } - todo!() + match self.mutation_kind { + MutationKind::CopyInto => Self::copy_into_table_schema(), + MutationKind::Insert => Arc::new(DataSchema::empty()), + _ => unreachable!(), + } + } +} + +impl Operator for CopyIntoTablePlan { + fn rel_op(&self) -> RelOp { + RelOp::CopyIntoTable + } +} + +impl PhysicalPlanBuilder { + pub async fn build_copy_into_table( + &mut self, + s_expr: &SExpr, + plan: &crate::plans::CopyIntoTablePlan, + ) -> Result { + let to_table = self + .ctx + .get_table(&plan.catalog_name, &plan.database_name, &plan.table_name) + .await?; + + let source = self.build(s_expr.child(0)?, Default::default()).await?; + + Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + plan_id: 0, + input: Box::new(source), + required_values_schema: plan.required_values_schema.clone(), + values_consts: plan.values_consts.clone(), + required_source_schema: plan.required_source_schema.clone(), + table_info: to_table.get_table_info().clone(), + project_columns: None, + }))) } } diff --git a/src/query/sql/src/planner/plans/insert.rs b/src/query/sql/src/planner/plans/insert.rs index 37ae25f41240b..b04c639464623 100644 --- a/src/query/sql/src/planner/plans/insert.rs +++ b/src/query/sql/src/planner/plans/insert.rs @@ -118,89 +118,90 @@ impl Insert { } pub(crate) fn format_insert_source( - plan_name: &str, - source: &InsertInputSource, - verbose: bool, - mut children: Vec, + _plan_name: &str, + _source: &InsertInputSource, + _verbose: bool, + _children: Vec, ) -> databend_common_exception::Result { - match source { - InsertInputSource::SelectPlan(plan) => { - if let Plan::Query { - s_expr, metadata, .. - } = &**plan - { - let metadata = &*metadata.read(); - let sub_tree = s_expr.to_format_tree(metadata, verbose)?; - children.push(sub_tree); - - return Ok(FormatTreeNode::with_children( - format!("{plan_name} (subquery):"), - children, - ) - .format_pretty()?); - } - Ok(String::new()) - } - InsertInputSource::Values(values) => match values { - InsertValue::Values { .. } => Ok(FormatTreeNode::with_children( - format!("{plan_name} (values):"), - children, - ) - .format_pretty()?), - InsertValue::RawValues { .. } => Ok(FormatTreeNode::with_children( - format!("{plan_name} (rawvalues):"), - children, - ) - .format_pretty()?), - }, - _ => todo!() - // InsertInputSource::Stage(plan) => match *plan.clone() { - // Plan::CopyIntoTable(copy_plan) => { - // let CopyIntoTablePlan { - // no_file_to_copy, - // required_values_schema, - // required_source_schema, - // force, - // source, - // enable_distributed, - // .. - // } = &*copy_plan; - // let required_values_schema = required_values_schema - // .fields() - // .iter() - // .map(|field| field.name().to_string()) - // .collect::>() - // .join(","); - // let required_source_schema = required_source_schema - // .fields() - // .iter() - // .map(|field| field.name().to_string()) - // .collect::>() - // .join(","); - // let stage_node = vec![ - // FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), - // FormatTreeNode::new(format!("from_attachment: {from_attachment}")), - // FormatTreeNode::new(format!( - // "required_values_schema: [{required_values_schema}]" - // )), - // FormatTreeNode::new(format!( - // "required_source_schema: [{required_source_schema}]" - // )), - // FormatTreeNode::new(format!("write_mode: {write_mode}")), - // FormatTreeNode::new(format!("validation_mode: {validation_mode}")), - // FormatTreeNode::new(format!("force: {force}")), - // FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), - // FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), - // ]; - // children.extend(stage_node); - // Ok( - // FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) - // .format_pretty()?, - // ) - // } - // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - // }, - } + // match source { + // InsertInputSource::SelectPlan(plan) => { + // if let Plan::Query { + // s_expr, metadata, .. + // } = &**plan + // { + // let metadata = &*metadata.read(); + // let sub_tree = s_expr.to_format_tree(metadata, verbose)?; + // children.push(sub_tree); + + // return Ok(FormatTreeNode::with_children( + // format!("{plan_name} (subquery):"), + // children, + // ) + // .format_pretty()?); + // } + // Ok(String::new()) + // } + // InsertInputSource::Values(values) => match values { + // InsertValue::Values { .. } => Ok(FormatTreeNode::with_children( + // format!("{plan_name} (values):"), + // children, + // ) + // .format_pretty()?), + // InsertValue::RawValues { .. } => Ok(FormatTreeNode::with_children( + // format!("{plan_name} (rawvalues):"), + // children, + // ) + // .format_pretty()?), + // }, + // InsertInputSource::Stage(plan) => match *plan.clone() { + // Plan::CopyIntoTable(copy_plan) => { + // let CopyIntoTablePlan { + // no_file_to_copy, + // from_attachment, + // required_values_schema, + // required_source_schema, + // write_mode, + // validation_mode, + // stage_table_info, + // enable_distributed, + // .. + // } = &*copy_plan; + // let required_values_schema = required_values_schema + // .fields() + // .iter() + // .map(|field| field.name().to_string()) + // .collect::>() + // .join(","); + // let required_source_schema = required_source_schema + // .fields() + // .iter() + // .map(|field| field.name().to_string()) + // .collect::>() + // .join(","); + // let stage_node = vec![ + // FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), + // FormatTreeNode::new(format!("from_attachment: {from_attachment}")), + // FormatTreeNode::new(format!( + // "required_values_schema: [{required_values_schema}]" + // )), + // FormatTreeNode::new(format!( + // "required_source_schema: [{required_source_schema}]" + // )), + // FormatTreeNode::new(format!("write_mode: {write_mode}")), + // FormatTreeNode::new(format!("validation_mode: {validation_mode}")), + // FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), + // FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), + // ]; + // children.extend(stage_node); + // Ok( + // FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) + // .format_pretty()?, + // ) + // } + // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), + // }, + // } + todo!() } impl std::fmt::Debug for Insert { diff --git a/src/query/sql/src/planner/plans/mod.rs b/src/query/sql/src/planner/plans/mod.rs index 2acc83d081d7f..4ae14cfb9bce1 100644 --- a/src/query/sql/src/planner/plans/mod.rs +++ b/src/query/sql/src/planner/plans/mod.rs @@ -52,6 +52,7 @@ mod sort; mod system; mod udf; mod union_all; +mod value_scan; mod window; pub use aggregate::*; @@ -99,4 +100,5 @@ pub use sort::*; pub use system::*; pub use udf::*; pub use union_all::UnionAll; +pub use value_scan::*; pub use window::*; diff --git a/src/query/sql/src/planner/plans/operator.rs b/src/query/sql/src/planner/plans/operator.rs index d7f6b20c5be54..15d5a2ed1c52a 100644 --- a/src/query/sql/src/planner/plans/operator.rs +++ b/src/query/sql/src/planner/plans/operator.rs @@ -18,7 +18,9 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use super::CopyIntoTablePlan; use super::MutationSource; +use super::ValueScan; use crate::optimizer::PhysicalProperty; use crate::optimizer::RelExpr; use crate::optimizer::RelationalProperty; @@ -121,6 +123,8 @@ pub enum RelOp { Recluster, CompactBlock, MutationSource, + CopyIntoTable, + ValueScan, // Pattern Pattern, @@ -153,6 +157,8 @@ pub enum RelOperator { Recluster(Recluster), CompactBlock(OptimizeCompactBlock), MutationSource(MutationSource), + CopyIntoTable(CopyIntoTablePlan), + ValueScan(ValueScan), } impl Operator for RelOperator { @@ -182,6 +188,8 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.rel_op(), RelOperator::CompactBlock(rel_op) => rel_op.rel_op(), RelOperator::MutationSource(rel_op) => rel_op.rel_op(), + RelOperator::CopyIntoTable(rel_op) => rel_op.rel_op(), + RelOperator::ValueScan(rel_op) => rel_op.rel_op(), } } @@ -211,6 +219,8 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.arity(), RelOperator::CompactBlock(rel_op) => rel_op.arity(), RelOperator::MutationSource(rel_op) => rel_op.arity(), + RelOperator::CopyIntoTable(rel_op) => rel_op.arity(), + RelOperator::ValueScan(rel_op) => rel_op.arity(), } } @@ -240,6 +250,8 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::CompactBlock(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::MutationSource(rel_op) => rel_op.derive_relational_prop(rel_expr), + RelOperator::CopyIntoTable(rel_op) => rel_op.derive_relational_prop(rel_expr), + RelOperator::ValueScan(rel_op) => rel_op.derive_relational_prop(rel_expr), } } @@ -269,6 +281,8 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::CompactBlock(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::MutationSource(rel_op) => rel_op.derive_physical_prop(rel_expr), + RelOperator::CopyIntoTable(rel_op) => rel_op.derive_physical_prop(rel_expr), + RelOperator::ValueScan(rel_op) => rel_op.derive_physical_prop(rel_expr), } } @@ -298,6 +312,8 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::CompactBlock(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::MutationSource(rel_op) => rel_op.derive_stats(rel_expr), + RelOperator::CopyIntoTable(rel_op) => rel_op.derive_stats(rel_expr), + RelOperator::ValueScan(rel_op) => rel_op.derive_stats(rel_expr), } } @@ -381,6 +397,12 @@ impl Operator for RelOperator { RelOperator::MutationSource(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } + RelOperator::CopyIntoTable(rel_op) => { + rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) + } + RelOperator::ValueScan(rel_op) => { + rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) + } } } @@ -463,6 +485,12 @@ impl Operator for RelOperator { RelOperator::MutationSource(rel_op) => { rel_op.compute_required_prop_children(ctx, rel_expr, required) } + RelOperator::CopyIntoTable(rel_op) => { + rel_op.compute_required_prop_children(ctx, rel_expr, required) + } + RelOperator::ValueScan(rel_op) => { + rel_op.compute_required_prop_children(ctx, rel_expr, required) + } } } } @@ -913,3 +941,43 @@ impl TryFrom for MutationSource { } } } + +impl From for RelOperator { + fn from(v: CopyIntoTablePlan) -> Self { + Self::CopyIntoTable(v) + } +} + +impl TryFrom for CopyIntoTablePlan { + type Error = ErrorCode; + fn try_from(value: RelOperator) -> Result { + if let RelOperator::CopyIntoTable(value) = value { + Ok(value) + } else { + Err(ErrorCode::Internal(format!( + "Cannot downcast {:?} to CopyIntoTable", + value.rel_op() + ))) + } + } +} + +impl From for RelOperator { + fn from(v: ValueScan) -> Self { + Self::ValueScan(v) + } +} + +impl TryFrom for ValueScan { + type Error = ErrorCode; + fn try_from(value: RelOperator) -> Result { + if let RelOperator::ValueScan(value) = value { + Ok(value) + } else { + Err(ErrorCode::Internal(format!( + "Cannot downcast {:?} to ValueScan", + value.rel_op() + ))) + } + } +} diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index 151e052e00f10..78cbb8e55eba2 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -27,6 +27,7 @@ use databend_common_expression::DataSchemaRefExt; use super::CreateDictionaryPlan; use super::DropDictionaryPlan; use super::ShowCreateDictionaryPlan; +use super::StageContext; use crate::binder::ExplainConfig; use crate::optimizer::SExpr; use crate::plans::copy_into_location::CopyIntoLocationPlan; @@ -42,7 +43,6 @@ use crate::plans::AlterViewPlan; use crate::plans::AlterVirtualColumnPlan; use crate::plans::AnalyzeTablePlan; use crate::plans::CallProcedurePlan; -use crate::plans::CopyIntoTablePlan; use crate::plans::CreateCatalogPlan; use crate::plans::CreateConnectionPlan; use crate::plans::CreateDatabasePlan; @@ -103,7 +103,6 @@ use crate::plans::ExecuteTaskPlan; use crate::plans::ExistsTablePlan; use crate::plans::GrantPrivilegePlan; use crate::plans::GrantRolePlan; -use crate::plans::Insert; use crate::plans::InsertMultiTable; use crate::plans::KillPlan; use crate::plans::ModifyTableColumnPlan; @@ -240,6 +239,9 @@ pub enum Plan { CopyIntoTable { s_expr: Box, + metadata: MetadataRef, + stage_context: Option>, + overwrite: bool, }, CopyIntoLocation(CopyIntoLocationPlan), @@ -411,25 +413,20 @@ pub enum RewriteKind { impl Plan { pub fn kind(&self) -> QueryKind { - // match self { - // Plan::Query { .. } => QueryKind::Query, - // Plan::CopyIntoTable(copy_plan) => match copy_plan.write_mode { - // CopyIntoTableMode::Insert { .. } => QueryKind::Insert, - // _ => QueryKind::CopyIntoTable, - // }, - // Plan::Explain { .. } - // | Plan::ExplainAnalyze { .. } - // | Plan::ExplainAst { .. } - // | Plan::ExplainSyntax { .. } => QueryKind::Explain, - // Plan::Insert(_) => QueryKind::Insert, - // Plan::Replace(_) - // | Plan::DataMutation { .. } - // | Plan::OptimizePurge(_) - // | Plan::OptimizeCompactSegment(_) - // | Plan::OptimizeCompactBlock { .. } => QueryKind::Update, - // _ => QueryKind::Other, - // } - todo!() + match self { + Plan::Query { .. } => QueryKind::Query, + Plan::CopyIntoTable { .. } => QueryKind::CopyIntoTable, + Plan::Explain { .. } + | Plan::ExplainAnalyze { .. } + | Plan::ExplainAst { .. } + | Plan::ExplainSyntax { .. } => QueryKind::Explain, + Plan::Replace(_) + | Plan::DataMutation { .. } + | Plan::OptimizePurge(_) + | Plan::OptimizeCompactSegment(_) + | Plan::OptimizeCompactBlock { .. } => QueryKind::Update, + _ => QueryKind::Other, + } } } diff --git a/src/query/sql/src/planner/plans/value_scan.rs b/src/query/sql/src/planner/plans/value_scan.rs new file mode 100644 index 0000000000000..5c3f4d786a302 --- /dev/null +++ b/src/query/sql/src/planner/plans/value_scan.rs @@ -0,0 +1,70 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use databend_common_exception::Result; +use databend_common_expression::DataSchemaRef; + +use crate::executor::physical_plans::PhysicalValueScan; +use crate::executor::physical_plans::Values; +use crate::executor::PhysicalPlan; +use crate::executor::PhysicalPlanBuilder; +use crate::plans::Operator; +use crate::plans::RelOp; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ValueScan { + pub values: Values, + pub dest_schema: DataSchemaRef, +} + +impl std::hash::Hash for ValueScan { + fn hash(&self, state: &mut H) { + self.values.hash(state); + } +} + +impl Operator for ValueScan { + fn rel_op(&self) -> RelOp { + RelOp::ValueScan + } + + fn arity(&self) -> usize { + 0 + } +} + +impl PhysicalPlanBuilder { + pub async fn build_value_scan( + &mut self, + plan: &crate::plans::ValueScan, + ) -> Result { + match &plan.values { + Values::Values(values) => Ok(PhysicalPlan::ValueScan(Box::new(PhysicalValueScan { + plan_id: 0, + values: Values::Values(values.clone()), + output_schema: plan.dest_schema.clone(), + }))), + Values::RawValues { rest_str, start } => { + Ok(PhysicalPlan::ValueScan(Box::new(PhysicalValueScan { + plan_id: 0, + values: Values::RawValues { + rest_str: rest_str.clone(), + start: *start, + }, + output_schema: plan.dest_schema.clone(), + }))) + } + } + } +} From c85485dc67900bb4911c130db7dd6b20b79b8692 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 26 Nov 2024 11:54:52 +0800 Subject: [PATCH 03/22] fix merge --- .../interpreter_copy_into_table.rs | 233 ++---------------- .../src/interpreters/interpreter_factory.rs | 4 +- .../src/interpreters/interpreter_replace.rs | 82 +++--- .../builders/builder_copy_into_table.rs | 2 - .../sql/src/planner/binder/copy_into_table.rs | 34 +-- src/query/sql/src/planner/binder/insert.rs | 2 +- .../sql/src/planner/optimizer/optimizer.rs | 4 +- .../sql/src/planner/plans/copy_into_table.rs | 54 ++-- src/query/sql/src/planner/plans/insert.rs | 137 +++++----- src/query/sql/src/planner/plans/plan.rs | 4 +- 10 files changed, 168 insertions(+), 388 deletions(-) diff --git a/src/query/service/src/interpreters/interpreter_copy_into_table.rs b/src/query/service/src/interpreters/interpreter_copy_into_table.rs index 7cce8e18333c1..88848727f340a 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_table.rs +++ b/src/query/service/src/interpreters/interpreter_copy_into_table.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use databend_common_catalog::lock::LockTableOption; +use databend_common_catalog::plan::StageTableInfo; use databend_common_exception::Result; use databend_common_expression::types::Int32Type; use databend_common_expression::types::StringType; @@ -24,7 +25,6 @@ use databend_common_expression::SendableDataBlockStream; use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::PhysicalPlanBuilder; use databend_common_sql::optimizer::SExpr; -use databend_common_sql::plans::StageContext; use log::debug; use log::info; @@ -45,7 +45,7 @@ pub struct CopyIntoTableInterpreter { ctx: Arc, s_expr: SExpr, metadata: MetadataRef, - stage_context: Option>, + stage_table_info: Option>, overwrite: bool, } @@ -87,13 +87,15 @@ impl Interpreter for CopyIntoTableInterpreter { ©_into_table.table_name, ) .await?; - let copied_files_meta_req = match &self.stage_context { - Some(stage_context) => PipelineBuilder::build_upsert_copied_files_to_meta_req( + let copied_files_meta_req = match &self.stage_table_info { + Some(stage_table_info) => PipelineBuilder::build_upsert_copied_files_to_meta_req( self.ctx.clone(), target_table.as_ref(), - stage_context.purge, - &stage_context.files_to_copy, - stage_context.force, + stage_table_info + .files_to_copy + .as_deref() + .unwrap_or_default(), + &stage_table_info.copy_into_table_options, )?, None => None, }; @@ -110,30 +112,27 @@ impl Interpreter for CopyIntoTableInterpreter { )?; // Purge files on pipeline finished. - if let Some(stage_context) = &self.stage_context { - let StageContext { - purge, - force: _, - files_to_copy, - duplicated_files_detected, - stage_info, - } = stage_context.as_ref(); + if let Some(stage_table_info) = &self.stage_table_info { + let files_to_copy = stage_table_info + .files_to_copy + .as_deref() + .unwrap_or_default(); info!( "set files to be purged, # of copied files: {}, # of duplicated files: {}", files_to_copy.len(), - duplicated_files_detected.len() + stage_table_info.duplicated_files_detected.len() ); let files_to_be_deleted = files_to_copy .iter() - .map(|v| v.path.clone()) - .chain(duplicated_files_detected.clone()) + .map(|f| f.path.clone()) + .chain(stage_table_info.duplicated_files_detected.clone()) .collect::>(); PipelineBuilder::set_purge_files_on_finished( self.ctx.clone(), files_to_be_deleted, - *purge, - stage_info.clone(), + &stage_table_info.copy_into_table_options, + stage_table_info.stage_info.clone(), &mut build_res.main_pipeline, )?; } @@ -159,10 +158,7 @@ impl Interpreter for CopyIntoTableInterpreter { let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; match ©_into_table.mutation_kind { MutationKind::CopyInto => { - let stage_context = self.stage_context.as_ref().unwrap(); - let blocks = self.get_copy_into_table_result( - stage_context.stage_info.copy_options.return_failed_only, - )?; + let blocks = self.get_copy_into_table_result()?; Ok(Box::pin(DataBlockStream::create(None, blocks))) } MutationKind::Insert => Ok(Box::pin(DataBlockStream::create(None, vec![]))), @@ -177,24 +173,24 @@ impl CopyIntoTableInterpreter { ctx: Arc, s_expr: SExpr, metadata: MetadataRef, - stage_context: Option>, + stage_table_info: Option>, overwrite: bool, ) -> Result { Ok(CopyIntoTableInterpreter { ctx, s_expr, metadata, - stage_context, + stage_table_info, overwrite, }) } fn get_copy_into_table_result(&self) -> Result> { let return_all = !self - .plan .stage_table_info - .stage_info - .copy_options + .as_ref() + .unwrap() + .copy_into_table_options .return_failed_only; let cs = self.ctx.get_copy_status(); @@ -233,183 +229,4 @@ impl CopyIntoTableInterpreter { ])]; Ok(blocks) } - - /// Build commit insertion pipeline. - async fn commit_insertion( - &self, - main_pipeline: &mut Pipeline, - plan: &CopyIntoTablePlan, - files_to_copy: Vec, - duplicated_files_detected: Vec, - update_stream_meta: Vec, - deduplicated_label: Option, - ) -> Result<()> { - let ctx = self.ctx.clone(); - let to_table = ctx - .get_table( - plan.catalog_info.catalog_name(), - &plan.database_name, - &plan.table_name, - ) - .await?; - - // Commit. - { - let copied_files_meta_req = PipelineBuilder::build_upsert_copied_files_to_meta_req( - ctx.clone(), - to_table.as_ref(), - &plan.stage_table_info.stage_info, - &files_to_copy, - plan.force, - )?; - - to_table.commit_insertion( - ctx.clone(), - main_pipeline, - copied_files_meta_req, - update_stream_meta, - plan.write_mode.is_overwrite(), - None, - deduplicated_label, - )?; - } - - // Purge files. - { - info!( - "set files to be purged, # of copied files: {}, # of duplicated files: {}", - files_to_copy.len(), - duplicated_files_detected.len() - ); - - let files_to_be_deleted = files_to_copy - .into_iter() - .map(|v| v.path) - .chain(duplicated_files_detected) - .collect::>(); - // set on_finished callback. - PipelineBuilder::set_purge_files_on_finished( - ctx.clone(), - files_to_be_deleted, - plan.stage_table_info.stage_info.copy_options.purge, - plan.stage_table_info.stage_info.clone(), - main_pipeline, - )?; - } - Ok(()) - } - - async fn on_no_files_to_copy(&self) -> Result { - // currently, there is only one thing that we care about: - // - // if `purge_duplicated_files_in_copy` and `purge` are all enabled, - // and there are duplicated files detected, we should clean them up immediately. - - // it might be better to reuse the PipelineBuilder::set_purge_files_on_finished, - // unfortunately, hooking the on_finished callback of a "blank" pipeline, - // e.g. `PipelineBuildResult::create` leads to runtime error (during pipeline execution). - - if self.plan.stage_table_info.stage_info.copy_options.purge - && !self - .plan - .stage_table_info - .duplicated_files_detected - .is_empty() - && self - .ctx - .get_settings() - .get_enable_purge_duplicated_files_in_copy()? - { - info!( - "purge_duplicated_files_in_copy enabled, number of duplicated files: {}", - self.plan.stage_table_info.duplicated_files_detected.len() - ); - - PipelineBuilder::purge_files_immediately( - self.ctx.clone(), - self.plan.stage_table_info.duplicated_files_detected.clone(), - self.plan.stage_table_info.stage_info.clone(), - ) - .await?; - } - Ok(PipelineBuildResult::create()) - } -} - -#[async_trait::async_trait] -impl Interpreter for CopyIntoTableInterpreter { - fn name(&self) -> &str { - "CopyIntoTableInterpreterV2" - } - - fn is_ddl(&self) -> bool { - false - } - - #[fastrace::trace] - #[async_backtrace::framed] - async fn execute2(&self) -> Result { - debug!("ctx.id" = self.ctx.get_id().as_str(); "copy_into_table_interpreter_execute_v2"); - - if check_deduplicate_label(self.ctx.clone()).await? { - return Ok(PipelineBuildResult::create()); - } - - if self.plan.no_file_to_copy { - info!("no file to copy"); - return self.on_no_files_to_copy().await; - } - - let (physical_plan, update_stream_meta) = self.build_physical_plan(&self.plan).await?; - let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await?; - - // Build commit insertion pipeline. - { - let files_to_copy = self - .plan - .stage_table_info - .files_to_copy - .clone() - .unwrap_or_default(); - - let duplicated_files_detected = - self.plan.stage_table_info.duplicated_files_detected.clone(); - - self.commit_insertion( - &mut build_res.main_pipeline, - &self.plan, - files_to_copy, - duplicated_files_detected, - update_stream_meta, - unsafe { self.ctx.get_settings().get_deduplicate_label()? }, - ) - .await?; - } - - // Execute hook. - { - let hook_operator = HookOperator::create( - self.ctx.clone(), - self.plan.catalog_info.catalog_name().to_string(), - self.plan.database_name.to_string(), - self.plan.table_name.to_string(), - MutationKind::Insert, - LockTableOption::LockNoRetry, - ); - hook_operator.execute(&mut build_res.main_pipeline).await; - } - - Ok(build_res) - } - - fn inject_result(&self) -> Result { - let blocks = if self.plan.no_file_to_copy { - vec![DataBlock::empty_with_schema(self.plan.schema())] - } else { - self.get_copy_into_table_result()? - }; - - Ok(Box::pin(DataBlockStream::create(None, blocks))) - } } diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index 099c026b36ced..b345d37a7b56b 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -159,13 +159,13 @@ impl InterpreterFactory { Plan::CopyIntoTable { s_expr, metadata, - stage_context, + stage_table_info, overwrite, } => Ok(Arc::new(CopyIntoTableInterpreter::try_create( ctx, *s_expr.clone(), metadata.clone(), - stage_context.clone(), + stage_table_info.clone(), *overwrite, )?)), Plan::CopyIntoLocation(copy_plan) => Ok(Arc::new( diff --git a/src/query/service/src/interpreters/interpreter_replace.rs b/src/query/service/src/interpreters/interpreter_replace.rs index 6018939ae07a2..9d7b974932c34 100644 --- a/src/query/service/src/interpreters/interpreter_replace.rs +++ b/src/query/service/src/interpreters/interpreter_replace.rs @@ -51,7 +51,6 @@ use parking_lot::RwLock; use crate::interpreters::common::check_deduplicate_label; use crate::interpreters::common::dml_build_update_stream_req; -use crate::interpreters::interpreter_copy_into_table::CopyIntoTableInterpreter; use crate::interpreters::HookOperator; use crate::interpreters::Interpreter; use crate::interpreters::InterpreterPtr; @@ -374,47 +373,48 @@ impl ReplaceInterpreter { #[async_backtrace::framed] async fn connect_input_source<'a>( &'a self, - ctx: Arc, - source: &'a InsertInputSource, - schema: DataSchemaRef, - purge_info: &mut Option<(Vec, StageInfo, CopyIntoTableOptions)>, + _ctx: Arc, + _source: &'a InsertInputSource, + _schema: DataSchemaRef, + _purge_info: &mut Option<(Vec, StageInfo, CopyIntoTableOptions)>, ) -> Result { - match source { - InsertInputSource::Values(source) => self - .connect_value_source(schema.clone(), source) - .map(|root| ReplaceSourceCtx { - root, - select_ctx: None, - update_stream_meta: vec![], - bind_context: None, - }), - - InsertInputSource::SelectPlan(plan) => { - self.connect_query_plan_source(ctx.clone(), plan).await - } - InsertInputSource::Stage(plan) => match *plan.clone() { - Plan::CopyIntoTable(copy_plan) => { - let interpreter = - CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; - let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; - - // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan - - *purge_info = Some(( - copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), - copy_plan.stage_table_info.stage_info.clone(), - copy_plan.stage_table_info.copy_into_table_options.clone(), - )); - Ok(ReplaceSourceCtx { - root: Box::new(physical_plan), - select_ctx: None, - update_stream_meta: vec![], - bind_context: None, - }) - } - _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - }, - } + // match source { + // InsertInputSource::Values(source) => self + // .connect_value_source(schema.clone(), source) + // .map(|root| ReplaceSourceCtx { + // root, + // select_ctx: None, + // update_stream_meta: vec![], + // bind_context: None, + // }), + + // InsertInputSource::SelectPlan(plan) => { + // self.connect_query_plan_source(ctx.clone(), plan).await + // } + // InsertInputSource::Stage(plan) => match *plan.clone() { + // Plan::CopyIntoTable(copy_plan) => { + // let interpreter = + // CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; + // let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; + + // // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan + + // *purge_info = Some(( + // copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), + // copy_plan.stage_table_info.stage_info.clone(), + // copy_plan.stage_table_info.copy_into_table_options.clone(), + // )); + // Ok(ReplaceSourceCtx { + // root: Box::new(physical_plan), + // select_ctx: None, + // update_stream_meta: vec![], + // bind_context: None, + // }) + // } + // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), + // }, + // } + todo!() } fn connect_value_source( diff --git a/src/query/service/src/pipelines/builders/builder_copy_into_table.rs b/src/query/service/src/pipelines/builders/builder_copy_into_table.rs index f72157c9f4efa..b6a02b5e202c5 100644 --- a/src/query/service/src/pipelines/builders/builder_copy_into_table.rs +++ b/src/query/service/src/pipelines/builders/builder_copy_into_table.rs @@ -25,8 +25,6 @@ use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::DataSchemaRefExt; use databend_common_expression::Scalar; -use databend_common_meta_app::principal::FileFormatParams; -use databend_common_meta_app::principal::ParquetFileFormatParams; use databend_common_meta_app::schema::TableCopiedFileInfo; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; use databend_common_pipeline_core::Pipeline; diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 1535135ce49bf..c603ef58620e4 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::str::FromStr; use std::sync::Arc; use databend_common_ast::ast::ColumnID as AstColumnID; @@ -69,7 +68,6 @@ use crate::executor::physical_plans::MutationKind; use crate::optimizer::SExpr; use crate::plans::CopyIntoTablePlan; use crate::plans::Plan; -use crate::plans::StageContext; use crate::BindContext; use crate::Metadata; use crate::NameResolutionContext; @@ -88,13 +86,12 @@ impl<'a> Binder { .bind_copy_into_table_common(bind_context, stmt, location) .await?; copy_into_table_plan - .collect_files(self.ctx.as_ref(), &mut stage_table_info, stmt.force) + .collect_files(self.ctx.as_ref(), &mut stage_table_info) .await?; self.bind_copy_into_table_from_location( bind_context, copy_into_table_plan, stage_table_info, - stmt.force, ) .await } @@ -112,7 +109,7 @@ impl<'a> Binder { .await?; copy_into_table_plan - .collect_files(self.ctx.as_ref(), &mut stage_table_info, stmt.force) + .collect_files(self.ctx.as_ref(), &mut stage_table_info) .await?; self.bind_copy_from_query_into_table( bind_context, @@ -120,7 +117,6 @@ impl<'a> Binder { stage_table_info, select_list, alias, - stmt.force, ) .await } @@ -213,6 +209,7 @@ impl<'a> Binder { is_select: false, default_values, copy_into_location_options: Default::default(), + copy_into_table_options: stmt.options.clone(), }; let copy_into_plan = CopyIntoTablePlan { catalog_name, @@ -234,7 +231,6 @@ impl<'a> Binder { bind_ctx: &BindContext, copy_into_table_plan: CopyIntoTablePlan, stage_table_info: StageTableInfo, - force: bool, ) -> Result { let use_query = matches!(&stage_table_info.stage_info.file_format_params, FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); @@ -277,7 +273,6 @@ impl<'a> Binder { stage_table_info, &select_list, &None, - force, ) .await } else { @@ -297,13 +292,7 @@ impl<'a> Binder { Ok(Plan::CopyIntoTable { s_expr: Box::new(copy_into), metadata: self.metadata.clone(), - stage_context: Some(Box::new(StageContext { - purge: stage_table_info.stage_info.copy_options.purge, - force, - files_to_copy: stage_table_info.files_to_copy.unwrap_or_default(), - duplicated_files_detected: stage_table_info.duplicated_files_detected, - stage_info: stage_table_info.stage_info, - })), + stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, }) } @@ -398,6 +387,7 @@ impl<'a> Binder { is_select: false, default_values: Some(default_values), copy_into_location_options: Default::default(), + copy_into_table_options: options, }; let copy_into_table_plan = CopyIntoTablePlan { @@ -415,7 +405,6 @@ impl<'a> Binder { bind_context, copy_into_table_plan, stage_table_info, - true, ) .await } @@ -429,7 +418,6 @@ impl<'a> Binder { stage_table_info: StageTableInfo, select_list: &'a [SelectTarget], alias: &Option, - force: bool, ) -> Result { let table_ctx = self.ctx.clone(); let (s_expr, mut from_context) = self @@ -483,10 +471,6 @@ impl<'a> Binder { // disable variant check to allow copy invalid JSON into tables let disable_variant_check = stage_table_info - .stage_info - .copy_options - let disable_variant_check = plan - .stage_table_info .copy_into_table_options .disable_variant_check; if disable_variant_check { @@ -513,13 +497,7 @@ impl<'a> Binder { Ok(Plan::CopyIntoTable { s_expr: Box::new(copy_into), metadata: self.metadata.clone(), - stage_context: Some(Box::new(StageContext { - purge: stage_table_info.stage_info.copy_options.purge, - force, - files_to_copy: stage_table_info.files_to_copy.unwrap_or_default(), - duplicated_files_detected: stage_table_info.duplicated_files_detected, - stage_info: stage_table_info.stage_info, - })), + stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, }) } diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index 9b948f82ee40c..d2ae1fac7c203 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -183,7 +183,7 @@ impl Binder { Arc::new(source), )), metadata: self.metadata.clone(), - stage_context: None, + stage_table_info: None, overwrite: *overwrite, }) } diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 0c9226bab26ba..85378b373b000 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -290,7 +290,7 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result Plan::CopyIntoTable { s_expr, metadata, - stage_context, + stage_table_info, overwrite, } => { let enable_distributed = opt_ctx.enable_distributed_optimization @@ -323,7 +323,7 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result Ok(Plan::CopyIntoTable { s_expr: Box::new(optimized), metadata, - stage_context, + stage_table_info, overwrite, }) } diff --git a/src/query/sql/src/planner/plans/copy_into_table.rs b/src/query/sql/src/planner/plans/copy_into_table.rs index 1f3de5764573f..03dbd8825e563 100644 --- a/src/query/sql/src/planner/plans/copy_into_table.rs +++ b/src/query/sql/src/planner/plans/copy_into_table.rs @@ -30,12 +30,10 @@ use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::DataSchemaRefExt; use databend_common_expression::Scalar; -use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::COPY_MAX_FILES_COMMIT_MSG; use databend_common_meta_app::principal::COPY_MAX_FILES_PER_COMMIT; use databend_common_metrics::storage::*; use databend_common_storage::init_stage_operator; -use databend_common_storage::StageFileInfo; use log::info; use super::Operator; @@ -59,15 +57,6 @@ pub struct CopyIntoTablePlan { pub mutation_kind: MutationKind, } -#[derive(Clone, Debug)] -pub struct StageContext { - pub purge: bool, - pub force: bool, - pub files_to_copy: Vec, - pub duplicated_files_detected: Vec, - pub stage_info: StageInfo, -} - impl Hash for CopyIntoTablePlan { fn hash(&self, state: &mut H) { self.catalog_name.hash(state); @@ -81,13 +70,10 @@ impl CopyIntoTablePlan { &self, ctx: &dyn TableContext, stage_table_info: &mut StageTableInfo, - force: bool, ) -> Result<()> { ctx.set_status_info("begin to list files"); let start = Instant::now(); - let max_files = stage_table_info.stage_info.copy_options.max_files; - let stage_table_info = &self.stage_table_info; let max_files = stage_table_info.copy_into_table_options.max_files; let max_files = if max_files == 0 { None @@ -191,26 +177,26 @@ impl CopyIntoTablePlan { } impl Debug for CopyIntoTablePlan { - fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - let CopyIntoTablePlan { - catalog_info, - database_name, - table_name, - no_file_to_copy, - validation_mode, - stage_table_info, - query, - .. - } = self; - write!( - f, - "Copy into {:}.{database_name:}.{table_name:}", - catalog_info.catalog_name() - )?; - write!(f, ", no_file_to_copy: {no_file_to_copy:?}")?; - write!(f, ", validation_mode: {validation_mode:?}")?; - write!(f, ", from: {stage_table_info:?}")?; - write!(f, " query: {query:?}")?; + fn fmt(&self, _f: &mut Formatter) -> std::fmt::Result { + // let CopyIntoTablePlan { + // catalog_info, + // database_name, + // table_name, + // no_file_to_copy, + // validation_mode, + // stage_table_info, + // query, + // .. + // } = self; + // write!( + // f, + // "Copy into {:}.{database_name:}.{table_name:}", + // catalog_info.catalog_name() + // )?; + // write!(f, ", no_file_to_copy: {no_file_to_copy:?}")?; + // write!(f, ", validation_mode: {validation_mode:?}")?; + // write!(f, ", from: {stage_table_info:?}")?; + // write!(f, " query: {query:?}")?; Ok(()) } } diff --git a/src/query/sql/src/planner/plans/insert.rs b/src/query/sql/src/planner/plans/insert.rs index 726de7f7678d6..b04c639464623 100644 --- a/src/query/sql/src/planner/plans/insert.rs +++ b/src/query/sql/src/planner/plans/insert.rs @@ -133,74 +133,75 @@ pub(crate) fn format_insert_source( // let sub_tree = s_expr.to_format_tree(metadata, verbose)?; // children.push(sub_tree); - return Ok(FormatTreeNode::with_children( - format!("{plan_name} (subquery):"), - children, - ) - .format_pretty()?); - } - Ok(String::new()) - } - InsertInputSource::Values(values) => match values { - InsertValue::Values { .. } => Ok(FormatTreeNode::with_children( - format!("{plan_name} (values):"), - children, - ) - .format_pretty()?), - InsertValue::RawValues { .. } => Ok(FormatTreeNode::with_children( - format!("{plan_name} (rawvalues):"), - children, - ) - .format_pretty()?), - }, - InsertInputSource::Stage(plan) => match *plan.clone() { - Plan::CopyIntoTable(copy_plan) => { - let CopyIntoTablePlan { - no_file_to_copy, - from_attachment, - required_values_schema, - required_source_schema, - write_mode, - validation_mode, - stage_table_info, - enable_distributed, - .. - } = &*copy_plan; - let required_values_schema = required_values_schema - .fields() - .iter() - .map(|field| field.name().to_string()) - .collect::>() - .join(","); - let required_source_schema = required_source_schema - .fields() - .iter() - .map(|field| field.name().to_string()) - .collect::>() - .join(","); - let stage_node = vec![ - FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), - FormatTreeNode::new(format!("from_attachment: {from_attachment}")), - FormatTreeNode::new(format!( - "required_values_schema: [{required_values_schema}]" - )), - FormatTreeNode::new(format!( - "required_source_schema: [{required_source_schema}]" - )), - FormatTreeNode::new(format!("write_mode: {write_mode}")), - FormatTreeNode::new(format!("validation_mode: {validation_mode}")), - FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), - FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), - ]; - children.extend(stage_node); - Ok( - FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) - .format_pretty()?, - ) - } - _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - }, - } + // return Ok(FormatTreeNode::with_children( + // format!("{plan_name} (subquery):"), + // children, + // ) + // .format_pretty()?); + // } + // Ok(String::new()) + // } + // InsertInputSource::Values(values) => match values { + // InsertValue::Values { .. } => Ok(FormatTreeNode::with_children( + // format!("{plan_name} (values):"), + // children, + // ) + // .format_pretty()?), + // InsertValue::RawValues { .. } => Ok(FormatTreeNode::with_children( + // format!("{plan_name} (rawvalues):"), + // children, + // ) + // .format_pretty()?), + // }, + // InsertInputSource::Stage(plan) => match *plan.clone() { + // Plan::CopyIntoTable(copy_plan) => { + // let CopyIntoTablePlan { + // no_file_to_copy, + // from_attachment, + // required_values_schema, + // required_source_schema, + // write_mode, + // validation_mode, + // stage_table_info, + // enable_distributed, + // .. + // } = &*copy_plan; + // let required_values_schema = required_values_schema + // .fields() + // .iter() + // .map(|field| field.name().to_string()) + // .collect::>() + // .join(","); + // let required_source_schema = required_source_schema + // .fields() + // .iter() + // .map(|field| field.name().to_string()) + // .collect::>() + // .join(","); + // let stage_node = vec![ + // FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), + // FormatTreeNode::new(format!("from_attachment: {from_attachment}")), + // FormatTreeNode::new(format!( + // "required_values_schema: [{required_values_schema}]" + // )), + // FormatTreeNode::new(format!( + // "required_source_schema: [{required_source_schema}]" + // )), + // FormatTreeNode::new(format!("write_mode: {write_mode}")), + // FormatTreeNode::new(format!("validation_mode: {validation_mode}")), + // FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), + // FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), + // ]; + // children.extend(stage_node); + // Ok( + // FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) + // .format_pretty()?, + // ) + // } + // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), + // }, + // } + todo!() } impl std::fmt::Debug for Insert { diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index 6ed5ccda242d3..e555106e928ff 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -17,6 +17,7 @@ use std::fmt::Formatter; use std::sync::Arc; use databend_common_ast::ast::ExplainKind; +use databend_common_catalog::plan::StageTableInfo; use databend_common_catalog::query_kind::QueryKind; use databend_common_expression::types::DataType; use databend_common_expression::DataField; @@ -28,7 +29,6 @@ use super::CreateDictionaryPlan; use super::DropDictionaryPlan; use super::RenameDictionaryPlan; use super::ShowCreateDictionaryPlan; -use super::StageContext; use crate::binder::ExplainConfig; use crate::optimizer::SExpr; use crate::plans::copy_into_location::CopyIntoLocationPlan; @@ -241,7 +241,7 @@ pub enum Plan { CopyIntoTable { s_expr: Box, metadata: MetadataRef, - stage_context: Option>, + stage_table_info: Option>, overwrite: bool, }, CopyIntoLocation(CopyIntoLocationPlan), From 84851c2803d8519a4601a5478fccefab1c9f7eea Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 29 Nov 2024 08:31:49 +0800 Subject: [PATCH 04/22] update --- src/query/catalog/src/table.rs | 5 +- .../interpreters/access/privilege_access.rs | 2 +- ...py_into_table.rs => interpreter_append.rs} | 67 +++++---- .../src/interpreters/interpreter_factory.rs | 8 +- .../interpreters/interpreter_table_create.rs | 56 +++----- .../interpreter_table_modify_column.rs | 75 +++------- src/query/service/src/interpreters/mod.rs | 2 +- ...r_copy_into_table.rs => builder_append.rs} | 6 +- .../builders/builder_append_table.rs | 2 +- .../src/pipelines/builders/builder_commit.rs | 2 +- .../builder_distributed_insert_select.rs | 67 --------- .../pipelines/builders/builder_mutation.rs | 1 - .../service/src/pipelines/builders/mod.rs | 3 +- .../service/src/pipelines/pipeline_builder.rs | 5 +- .../transform_recursive_cte_source.rs | 3 +- .../src/schedulers/fragments/fragmenter.rs | 6 +- .../src/schedulers/fragments/plan_fragment.rs | 6 +- src/query/service/src/sessions/queue_mgr.rs | 2 +- src/query/service/src/test_kits/fixture.rs | 2 +- src/query/sql/src/executor/format.rs | 28 +--- src/query/sql/src/executor/physical_plan.rs | 32 ++--- .../sql/src/executor/physical_plan_builder.rs | 2 +- .../sql/src/executor/physical_plan_visitor.rs | 31 +---- .../sql/src/executor/physical_plans/common.rs | 2 - .../sql/src/executor/physical_plans/mod.rs | 6 +- ..._copy_into_table.rs => physical_append.rs} | 4 +- .../physical_distributed_insert_select.rs | 32 ----- .../sql/src/planner/binder/copy_into_table.rs | 82 ++++++++--- src/query/sql/src/planner/binder/insert.rs | 26 ++-- src/query/sql/src/planner/binder/util.rs | 2 +- .../sql/src/planner/format/display_plan.rs | 2 +- .../decorrelate/subquery_rewriter.rs | 2 +- .../dynamic_sample/dynamic_sample.rs | 2 +- src/query/sql/src/planner/optimizer/format.rs | 2 +- .../src/planner/optimizer/hyper_dp/dphyp.rs | 2 +- .../sql/src/planner/optimizer/optimizer.rs | 6 +- .../rule/rewrite/rule_semi_to_inner_join.rs | 2 +- src/query/sql/src/planner/optimizer/s_expr.rs | 4 +- .../plans/{copy_into_table.rs => append.rs} | 130 +++++++++++++----- src/query/sql/src/planner/plans/mod.rs | 4 +- src/query/sql/src/planner/plans/operator.rs | 32 ++--- src/query/sql/src/planner/plans/plan.rs | 5 +- src/query/storages/fuse/src/fuse_table.rs | 5 +- .../storages/fuse/src/operations/commit.rs | 5 +- .../common/processors/sink_commit.rs | 49 ++----- .../storages/fuse/src/operations/truncate.rs | 15 +- .../storages/hive/hive/src/hive_table.rs | 3 +- src/query/storages/memory/src/memory_table.rs | 3 +- 48 files changed, 356 insertions(+), 484 deletions(-) rename src/query/service/src/interpreters/{interpreter_copy_into_table.rs => interpreter_append.rs} (84%) rename src/query/service/src/pipelines/builders/{builder_copy_into_table.rs => builder_append.rs} (97%) delete mode 100644 src/query/service/src/pipelines/builders/builder_distributed_insert_select.rs rename src/query/sql/src/executor/physical_plans/{physical_copy_into_table.rs => physical_append.rs} (96%) delete mode 100644 src/query/sql/src/executor/physical_plans/physical_distributed_insert_select.rs rename src/query/sql/src/planner/plans/{copy_into_table.rs => append.rs} (72%) diff --git a/src/query/catalog/src/table.rs b/src/query/catalog/src/table.rs index 916d170f15633..874bdad074c6d 100644 --- a/src/query/catalog/src/table.rs +++ b/src/query/catalog/src/table.rs @@ -37,7 +37,6 @@ use databend_common_meta_types::MetaId; use databend_common_pipeline_core::Pipeline; use databend_common_storage::Histogram; use databend_common_storage::StorageMetrics; -use databend_storages_common_table_meta::meta::SnapshotId; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::table::ChangeType; use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX; @@ -236,7 +235,7 @@ pub trait Table: Sync + Send { copied_files: Option, update_stream_meta: Vec, overwrite: bool, - prev_snapshot_id: Option, + forbid_occ_retry: bool, _deduplicated_label: Option, ) -> Result<()> { let (_, _, _, _, _, _) = ( @@ -245,7 +244,7 @@ pub trait Table: Sync + Send { update_stream_meta, pipeline, overwrite, - prev_snapshot_id, + forbid_occ_retry, ); Ok(()) diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index b9cb3702b8442..b4a1c522e1388 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -1164,7 +1164,7 @@ impl AccessChecker for PrivilegeAccess { self.validate_access(&GrantObject::Global, UserPrivilegeType::Alter, false, false) .await?; } - Plan::CopyIntoTable { .. } => { + Plan::Append { .. } => { // match &plan.source{ // } diff --git a/src/query/service/src/interpreters/interpreter_copy_into_table.rs b/src/query/service/src/interpreters/interpreter_append.rs similarity index 84% rename from src/query/service/src/interpreters/interpreter_copy_into_table.rs rename to src/query/service/src/interpreters/interpreter_append.rs index 88848727f340a..fac02fd5b4ac0 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_table.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use databend_common_catalog::lock::LockTableOption; use databend_common_catalog::plan::StageTableInfo; +use databend_common_catalog::table::TableExt; use databend_common_exception::Result; use databend_common_expression::types::Int32Type; use databend_common_expression::types::StringType; @@ -25,6 +26,7 @@ use databend_common_expression::SendableDataBlockStream; use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::PhysicalPlanBuilder; use databend_common_sql::optimizer::SExpr; +use databend_common_sql::plans::AppendType; use log::debug; use log::info; @@ -37,22 +39,23 @@ use crate::pipelines::PipelineBuilder; use crate::schedulers::build_query_pipeline_without_render_result_set; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::plans::CopyIntoTablePlan; +use crate::sql::plans::Append; use crate::sql::MetadataRef; use crate::stream::DataBlockStream; -pub struct CopyIntoTableInterpreter { +pub struct AppendInterpreter { ctx: Arc, s_expr: SExpr, metadata: MetadataRef, stage_table_info: Option>, overwrite: bool, + col_type_modified: bool, } #[async_trait::async_trait] -impl Interpreter for CopyIntoTableInterpreter { +impl Interpreter for AppendInterpreter { fn name(&self) -> &str { - "CopyIntoTableInterpreterV2" + "AppendInterpreter" } fn is_ddl(&self) -> bool { @@ -67,7 +70,21 @@ impl Interpreter for CopyIntoTableInterpreter { return Ok(PipelineBuildResult::create()); } - // build source and append pipeline + let copy_into_table: Append = self.s_expr.plan().clone().try_into()?; + let (target_table, catalog, database, table) = { + let metadata = self.metadata.read(); + let t = metadata.table(copy_into_table.table_index); + ( + t.table(), + t.catalog().to_string(), + t.database().to_string(), + t.name().to_string(), + ) + }; + + target_table.check_mutable()?; + + // 1. build source and append pipeline let mut build_res = { let mut physical_plan_builder = PhysicalPlanBuilder::new(self.metadata.clone(), self.ctx.clone(), false); @@ -77,16 +94,7 @@ impl Interpreter for CopyIntoTableInterpreter { build_query_pipeline_without_render_result_set(&self.ctx, &physical_plan).await? }; - // build commit pipeline - let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; - let target_table = self - .ctx - .get_table( - ©_into_table.catalog_name, - ©_into_table.database_name, - ©_into_table.table_name, - ) - .await?; + // 2. build commit pipeline let copied_files_meta_req = match &self.stage_table_info { Some(stage_table_info) => PipelineBuilder::build_upsert_copied_files_to_meta_req( self.ctx.clone(), @@ -107,11 +115,11 @@ impl Interpreter for CopyIntoTableInterpreter { copied_files_meta_req, update_stream_meta, self.overwrite, - None, + self.col_type_modified, unsafe { self.ctx.get_settings().get_deduplicate_label()? }, )?; - // Purge files on pipeline finished. + // 3. Purge files on pipeline finished. if let Some(stage_table_info) = &self.stage_table_info { let files_to_copy = stage_table_info .files_to_copy @@ -137,14 +145,13 @@ impl Interpreter for CopyIntoTableInterpreter { )?; } - // Execute hook. + // 4. Execute hook. { - let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; let hook_operator = HookOperator::create( self.ctx.clone(), - copy_into_table.catalog_name.to_string(), - copy_into_table.database_name.to_string(), - copy_into_table.table_name.to_string(), + catalog, + database, + table, MutationKind::Insert, LockTableOption::LockNoRetry, ); @@ -155,33 +162,33 @@ impl Interpreter for CopyIntoTableInterpreter { } fn inject_result(&self) -> Result { - let copy_into_table: CopyIntoTablePlan = self.s_expr.plan().clone().try_into()?; - match ©_into_table.mutation_kind { - MutationKind::CopyInto => { + let copy_into_table: Append = self.s_expr.plan().clone().try_into()?; + match ©_into_table.append_type { + AppendType::CopyInto => { let blocks = self.get_copy_into_table_result()?; Ok(Box::pin(DataBlockStream::create(None, blocks))) } - MutationKind::Insert => Ok(Box::pin(DataBlockStream::create(None, vec![]))), - _ => unreachable!(), + AppendType::Insert => Ok(Box::pin(DataBlockStream::create(None, vec![]))), } } } -impl CopyIntoTableInterpreter { - /// Create a CopyInterpreter with context and [`CopyIntoTablePlan`]. +impl AppendInterpreter { pub fn try_create( ctx: Arc, s_expr: SExpr, metadata: MetadataRef, stage_table_info: Option>, overwrite: bool, + col_type_modified: bool, ) -> Result { - Ok(CopyIntoTableInterpreter { + Ok(AppendInterpreter { ctx, s_expr, metadata, stage_table_info, overwrite, + col_type_modified, }) } diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index b345d37a7b56b..08eccee820cca 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -36,13 +36,13 @@ use super::interpreter_table_set_options::SetOptionsInterpreter; use super::interpreter_user_stage_drop::DropUserStageInterpreter; use super::*; use crate::interpreters::access::Accessor; +use crate::interpreters::interpreter_append::AppendInterpreter; use crate::interpreters::interpreter_catalog_drop::DropCatalogInterpreter; use crate::interpreters::interpreter_connection_create::CreateConnectionInterpreter; use crate::interpreters::interpreter_connection_desc::DescConnectionInterpreter; use crate::interpreters::interpreter_connection_drop::DropConnectionInterpreter; use crate::interpreters::interpreter_connection_show::ShowConnectionsInterpreter; use crate::interpreters::interpreter_copy_into_location::CopyIntoLocationInterpreter; -use crate::interpreters::interpreter_copy_into_table::CopyIntoTableInterpreter; use crate::interpreters::interpreter_file_format_create::CreateFileFormatInterpreter; use crate::interpreters::interpreter_file_format_drop::DropFileFormatInterpreter; use crate::interpreters::interpreter_file_format_show::ShowFileFormatsInterpreter; @@ -156,17 +156,19 @@ impl InterpreterFactory { *graphical, )?)), - Plan::CopyIntoTable { + Plan::Append { s_expr, metadata, stage_table_info, overwrite, - } => Ok(Arc::new(CopyIntoTableInterpreter::try_create( + forbid_occ_retry: col_type_modified, + } => Ok(Arc::new(AppendInterpreter::try_create( ctx, *s_expr.clone(), metadata.clone(), stage_table_info.clone(), *overwrite, + *col_type_modified, )?)), Plan::CopyIntoLocation(copy_plan) => Ok(Arc::new( CopyIntoLocationInterpreter::try_create(ctx, copy_plan.clone())?, diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs index 16f84053a8cdc..1794375405150 100644 --- a/src/query/service/src/interpreters/interpreter_table_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_create.rs @@ -38,10 +38,8 @@ use databend_common_meta_app::schema::TableNameIdent; use databend_common_meta_app::schema::TableStatistics; use databend_common_meta_types::MatchSeq; use databend_common_pipeline_core::ExecutionInfo; -use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::field_default_value; -use databend_common_sql::optimizer::SExpr; -use databend_common_sql::plans::CopyIntoTablePlan; +use databend_common_sql::plans::create_append_plan_from_subquery; use databend_common_sql::plans::CreateTablePlan; use databend_common_storages_fuse::io::MetaReaders; use databend_common_storages_fuse::FuseStorageFormat; @@ -66,8 +64,8 @@ use crate::interpreters::common::table_option_validation::is_valid_create_opt; use crate::interpreters::common::table_option_validation::is_valid_data_retention_period; use crate::interpreters::common::table_option_validation::is_valid_random_seed; use crate::interpreters::common::table_option_validation::is_valid_row_per_block; -use crate::interpreters::interpreter_copy_into_table::CopyIntoTableInterpreter; use crate::interpreters::Interpreter; +use crate::interpreters::InterpreterFactory; use crate::pipelines::PipelineBuildResult; use crate::sessions::QueryContext; use crate::sessions::TableContext; @@ -219,44 +217,28 @@ impl CreateTableInterpreter { // For the situation above, we implicitly cast the data type when inserting data. // The casting and schema checking is in interpreter_insert.rs, function check_schema_cast. - let _table_info = TableInfo::new( + let table_info = TableInfo::new( &self.plan.database, &self.plan.table, TableIdent::new(table_id, table_id_seq), table_meta, ); - - let (project_columns, source, metadata) = match select_plan.as_ref() { - Plan::Query { - bind_context, - s_expr, - metadata, - .. - } => ( - Some(bind_context.columns.clone()), - *s_expr.clone(), - metadata.clone(), - ), - _ => unreachable!(), - }; - - let insert_plan = CopyIntoTablePlan { - catalog_name: self.plan.catalog.clone(), - database_name: self.plan.database.clone(), - table_name: self.plan.table.clone(), - required_values_schema: Arc::new(self.plan.schema.clone().into()), - values_consts: vec![], - required_source_schema: Arc::new(self.plan.schema.clone().into()), - project_columns, - mutation_kind: MutationKind::Insert, - }; - - let s_expr = SExpr::create_unary(Arc::new(insert_plan.into()), Arc::new(source)); - - let mut pipeline = - CopyIntoTableInterpreter::try_create(self.ctx.clone(), s_expr, metadata, None, false)? - .execute2() - .await?; + let table = self.ctx.build_table_by_table_info(&table_info, None)?; + + let append_plan = create_append_plan_from_subquery( + &select_plan, + self.plan.catalog.clone(), + self.plan.database.clone(), + table, + Arc::new(self.plan.schema.clone().into()), + false, + self.ctx.clone(), + ) + .await?; + let mut pipeline = InterpreterFactory::get(self.ctx.clone(), &append_plan) + .await? + .execute2() + .await?; let db_name = self.plan.database.clone(); let table_name = self.plan.table.clone(); diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index d749c81bcc954..cc6ca8736ca10 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -33,16 +33,12 @@ use databend_common_meta_app::schema::SetTableColumnMaskPolicyReq; use databend_common_meta_app::schema::TableMeta; use databend_common_meta_app::schema::UpdateTableMetaReq; use databend_common_meta_types::MatchSeq; -use databend_common_sql::executor::physical_plans::DistributedInsertSelect; -use databend_common_sql::executor::PhysicalPlan; -use databend_common_sql::executor::PhysicalPlanBuilder; use databend_common_sql::field_default_value; +use databend_common_sql::plans::create_append_plan_from_subquery; use databend_common_sql::plans::ModifyColumnAction; use databend_common_sql::plans::ModifyTableColumnPlan; -use databend_common_sql::plans::Plan; use databend_common_sql::BloomIndexColumns; use databend_common_sql::Planner; -use databend_common_storages_fuse::FuseTable; use databend_common_storages_stream::stream_table::STREAM_ENGINE; use databend_common_storages_view::view_table::VIEW_ENGINE; use databend_common_users::UserApiProvider; @@ -50,10 +46,10 @@ use databend_enterprise_data_mask_feature::get_datamask_handler; use databend_storages_common_index::BloomIndex; use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS; +use super::InterpreterFactory; use crate::interpreters::common::check_referenced_computed_columns; use crate::interpreters::Interpreter; use crate::pipelines::PipelineBuildResult; -use crate::schedulers::build_query_pipeline_without_render_result_set; use crate::sessions::QueryContext; use crate::sessions::TableContext; @@ -164,12 +160,6 @@ impl ModifyTableColumnInterpreter { let catalog_name = table_info.catalog(); let catalog = self.ctx.get_catalog(catalog_name).await?; - let fuse_table = FuseTable::try_from_table(table.as_ref())?; - let prev_snapshot_id = fuse_table - .read_table_snapshot() - .await - .map_or(None, |v| v.map(|snapshot| snapshot.snapshot_id)); - let mut bloom_index_cols = vec![]; if let Some(v) = table_info.options().get(OPT_KEY_BLOOM_INDEX_COLUMNS) { if let BloomIndexColumns::Specify(cols) = v.parse::()? { @@ -359,54 +349,23 @@ impl ModifyTableColumnInterpreter { let mut planner = Planner::new(self.ctx.clone()); let (plan, _extras) = planner.plan_sql(&sql).await?; - // 3. build physical plan by plan - let (select_plan, select_column_bindings) = match plan { - Plan::Query { - s_expr, - metadata, - bind_context, - .. - } => { - let mut builder1 = - PhysicalPlanBuilder::new(metadata.clone(), self.ctx.clone(), false); - ( - builder1.build(&s_expr, bind_context.column_set()).await?, - bind_context.columns.clone(), - ) - } - _ => unreachable!(), - }; - - // 4. define select schema and insert schema of DistributedInsertSelect plan table_info.meta.schema = new_schema.clone().into(); - let new_table = FuseTable::try_create(table_info)?; - - // 5. build DistributedInsertSelect plan - let insert_plan = - PhysicalPlan::DistributedInsertSelect(Box::new(DistributedInsertSelect { - plan_id: select_plan.get_id(), - input: Box::new(select_plan), - table_info: new_table.get_table_info().clone(), - select_schema: Arc::new(Arc::new(schema).into()), - select_column_bindings, - insert_schema: Arc::new(Arc::new(new_schema).into()), - cast_needed: true, - })); - let mut build_res = - build_query_pipeline_without_render_result_set(&self.ctx, &insert_plan).await?; - - // 6. commit new meta schema and snapshots - new_table.commit_insertion( - self.ctx.clone(), - &mut build_res.main_pipeline, - None, - vec![], + let new_table = self.ctx.build_table_by_table_info(&table_info, None)?; + + let append_plan = create_append_plan_from_subquery( + &plan, + self.plan.catalog.clone(), + self.plan.database.clone(), + new_table, + Arc::new(DataSchema::from(&new_schema)), true, - prev_snapshot_id, - None, - )?; - - Ok(build_res) + self.ctx.clone(), + ) + .await?; + InterpreterFactory::get(self.ctx.clone(), &append_plan) + .await? + .execute2() + .await } // unset data mask policy to a column is a ee feature. diff --git a/src/query/service/src/interpreters/mod.rs b/src/query/service/src/interpreters/mod.rs index 171ece337a970..2865542b1b6e6 100644 --- a/src/query/service/src/interpreters/mod.rs +++ b/src/query/service/src/interpreters/mod.rs @@ -16,6 +16,7 @@ mod access; pub(crate) mod common; mod hook; mod interpreter; +mod interpreter_append; mod interpreter_catalog_create; mod interpreter_catalog_drop; mod interpreter_catalog_show_create; @@ -27,7 +28,6 @@ mod interpreter_connection_desc; mod interpreter_connection_drop; mod interpreter_connection_show; mod interpreter_copy_into_location; -mod interpreter_copy_into_table; mod interpreter_data_mask_create; mod interpreter_data_mask_desc; mod interpreter_data_mask_drop; diff --git a/src/query/service/src/pipelines/builders/builder_copy_into_table.rs b/src/query/service/src/pipelines/builders/builder_append.rs similarity index 97% rename from src/query/service/src/pipelines/builders/builder_copy_into_table.rs rename to src/query/service/src/pipelines/builders/builder_append.rs index b6a02b5e202c5..e8f5f12299d0c 100644 --- a/src/query/service/src/pipelines/builders/builder_copy_into_table.rs +++ b/src/query/service/src/pipelines/builders/builder_append.rs @@ -29,7 +29,7 @@ use databend_common_meta_app::schema::TableCopiedFileInfo; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; -use databend_common_sql::executor::physical_plans::CopyIntoTable; +use databend_common_sql::executor::physical_plans::PhysicalAppend; use databend_common_storage::StageFileInfo; use log::debug; use log::info; @@ -41,7 +41,7 @@ use crate::pipelines::PipelineBuilder; use crate::sessions::QueryContext; impl PipelineBuilder { - pub(crate) fn build_copy_into_table(&mut self, copy: &CopyIntoTable) -> Result<()> { + pub(crate) fn build_copy_into_table(&mut self, copy: &PhysicalAppend) -> Result<()> { let to_table = self.ctx.build_table_by_table_info(©.table_info, None)?; self.ctx .set_read_block_thresholds(to_table.get_block_thresholds()); @@ -111,7 +111,7 @@ impl PipelineBuilder { fn build_append_data_pipeline( ctx: Arc, main_pipeline: &mut Pipeline, - plan: &CopyIntoTable, + plan: &PhysicalAppend, source_schema: Arc, to_table: Arc, ) -> Result<()> { diff --git a/src/query/service/src/pipelines/builders/builder_append_table.rs b/src/query/service/src/pipelines/builders/builder_append_table.rs index 90e151df25502..379061e6e0733 100644 --- a/src/query/service/src/pipelines/builders/builder_append_table.rs +++ b/src/query/service/src/pipelines/builders/builder_append_table.rs @@ -47,7 +47,7 @@ impl PipelineBuilder { copied_files, update_stream_meta, overwrite, - None, + false, deduplicated_label, )?; diff --git a/src/query/service/src/pipelines/builders/builder_commit.rs b/src/query/service/src/pipelines/builders/builder_commit.rs index e4becfd76f551..993f4445a4592 100644 --- a/src/query/service/src/pipelines/builders/builder_commit.rs +++ b/src/query/service/src/pipelines/builders/builder_commit.rs @@ -72,7 +72,7 @@ impl PipelineBuilder { snapshot_gen.clone(), input, None, - None, + false, plan.deduplicated_label.clone(), ) }) diff --git a/src/query/service/src/pipelines/builders/builder_distributed_insert_select.rs b/src/query/service/src/pipelines/builders/builder_distributed_insert_select.rs deleted file mode 100644 index ef2bc29e131b3..0000000000000 --- a/src/query/service/src/pipelines/builders/builder_distributed_insert_select.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_exception::Result; -use databend_common_pipeline_transforms::processors::TransformPipelineHelper; -use databend_common_sql::executor::physical_plans::DistributedInsertSelect; - -use crate::pipelines::processors::TransformCastSchema; -use crate::pipelines::PipelineBuilder; - -impl PipelineBuilder { - pub fn build_distributed_insert_select( - &mut self, - insert_select: &DistributedInsertSelect, - ) -> Result<()> { - let select_schema = &insert_select.select_schema; - let insert_schema = &insert_select.insert_schema; - - self.build_pipeline(&insert_select.input)?; - - // should render result for select - PipelineBuilder::build_result_projection( - &self.func_ctx, - insert_select.input.output_schema()?, - &insert_select.select_column_bindings, - &mut self.main_pipeline, - false, - )?; - - if insert_select.cast_needed { - self.main_pipeline.try_add_transformer(|| { - TransformCastSchema::try_new( - select_schema.clone(), - insert_schema.clone(), - self.func_ctx.clone(), - ) - })?; - } - - let table = self - .ctx - .build_table_by_table_info(&insert_select.table_info, None)?; - - let source_schema = insert_schema; - Self::fill_and_reorder_columns( - self.ctx.clone(), - &mut self.main_pipeline, - table.clone(), - source_schema.clone(), - )?; - - table.append_data(self.ctx.clone(), &mut self.main_pipeline)?; - - Ok(()) - } -} diff --git a/src/query/service/src/pipelines/builders/builder_mutation.rs b/src/query/service/src/pipelines/builders/builder_mutation.rs index 18b64ffe59f0a..8195ed8103c70 100644 --- a/src/query/service/src/pipelines/builders/builder_mutation.rs +++ b/src/query/service/src/pipelines/builders/builder_mutation.rs @@ -41,7 +41,6 @@ use databend_common_storages_fuse::FuseTable; use crate::pipelines::processors::transforms::TransformAddComputedColumns; use crate::pipelines::processors::TransformResortAddOnWithoutSourceSchema; use crate::pipelines::PipelineBuilder; - impl PipelineBuilder { // build mutation serialize and mutation pipeline pub(crate) fn build_mutation(&mut self, merge_into: &Mutation) -> Result<()> { diff --git a/src/query/service/src/pipelines/builders/mod.rs b/src/query/service/src/pipelines/builders/mod.rs index a35c735954af7..ce00718b0d466 100644 --- a/src/query/service/src/pipelines/builders/mod.rs +++ b/src/query/service/src/pipelines/builders/mod.rs @@ -14,14 +14,13 @@ mod builder_add_stream_column; mod builder_aggregate; +mod builder_append; mod builder_append_table; mod builder_async_function; mod builder_column_mutation; mod builder_commit; mod builder_compact; mod builder_copy_into_location; -mod builder_copy_into_table; -mod builder_distributed_insert_select; mod builder_exchange; mod builder_fill_missing_columns; mod builder_filter; diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 8683539205fb6..3334defe6bd02 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -180,9 +180,6 @@ impl PipelineBuilder { PhysicalPlan::ExchangeSink(sink) => self.build_exchange_sink(sink), PhysicalPlan::ExchangeSource(source) => self.build_exchange_source(source), PhysicalPlan::UnionAll(union_all) => self.build_union_all(union_all), - PhysicalPlan::DistributedInsertSelect(insert_select) => { - self.build_distributed_insert_select(insert_select) - } PhysicalPlan::ProjectSet(project_set) => self.build_project_set(project_set), PhysicalPlan::Udf(udf) => self.build_udf(udf), PhysicalPlan::Exchange(_) => Err(ErrorCode::Internal( @@ -198,7 +195,7 @@ impl PipelineBuilder { } // Copy into. - PhysicalPlan::CopyIntoTable(copy) => self.build_copy_into_table(copy), + PhysicalPlan::Append(copy) => self.build_copy_into_table(copy), PhysicalPlan::CopyIntoLocation(copy) => self.build_copy_into_location(copy), // Replace. diff --git a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs index a93ac4eaf44a7..772517afac01c 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs @@ -323,10 +323,9 @@ async fn create_memory_table_for_cte_scan( | PhysicalPlan::ConstantTableScan(_) | PhysicalPlan::ExpressionScan(_) | PhysicalPlan::CacheScan(_) - | PhysicalPlan::DistributedInsertSelect(_) | PhysicalPlan::ExchangeSource(_) | PhysicalPlan::ExchangeSink(_) - | PhysicalPlan::CopyIntoTable(_) + | PhysicalPlan::Append(_) | PhysicalPlan::CopyIntoLocation(_) | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::ReplaceDeduplicate(_) diff --git a/src/query/service/src/schedulers/fragments/fragmenter.rs b/src/query/service/src/schedulers/fragments/fragmenter.rs index f3ad2db19de48..118e526ff4739 100644 --- a/src/query/service/src/schedulers/fragments/fragmenter.rs +++ b/src/query/service/src/schedulers/fragments/fragmenter.rs @@ -18,13 +18,13 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_sql::executor::physical_plans::CompactSource; use databend_common_sql::executor::physical_plans::ConstantTableScan; -use databend_common_sql::executor::physical_plans::CopyIntoTable; use databend_common_sql::executor::physical_plans::Exchange; use databend_common_sql::executor::physical_plans::ExchangeSink; use databend_common_sql::executor::physical_plans::ExchangeSource; use databend_common_sql::executor::physical_plans::FragmentKind; use databend_common_sql::executor::physical_plans::HashJoin; use databend_common_sql::executor::physical_plans::MutationSource; +use databend_common_sql::executor::physical_plans::PhysicalAppend; use databend_common_sql::executor::physical_plans::Recluster; use databend_common_sql::executor::physical_plans::ReplaceInto; use databend_common_sql::executor::physical_plans::TableScan; @@ -178,9 +178,9 @@ impl PhysicalPlanReplacer for Fragmenter { }))) } - fn replace_copy_into_table(&mut self, plan: &CopyIntoTable) -> Result { + fn replace_copy_into_table(&mut self, plan: &PhysicalAppend) -> Result { let input = self.replace(&plan.input)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: plan.plan_id, input: Box::new(input), ..plan.clone() diff --git a/src/query/service/src/schedulers/fragments/plan_fragment.rs b/src/query/service/src/schedulers/fragments/plan_fragment.rs index 86aa3c20c81aa..d4a1b26654c14 100644 --- a/src/query/service/src/schedulers/fragments/plan_fragment.rs +++ b/src/query/service/src/schedulers/fragments/plan_fragment.rs @@ -27,8 +27,8 @@ use databend_common_expression::Value; use databend_common_settings::ReplaceIntoShuffleStrategy; use databend_common_sql::executor::physical_plans::CompactSource; use databend_common_sql::executor::physical_plans::ConstantTableScan; -use databend_common_sql::executor::physical_plans::CopyIntoTable; use databend_common_sql::executor::physical_plans::MutationSource; +use databend_common_sql::executor::physical_plans::PhysicalAppend; use databend_common_sql::executor::physical_plans::Recluster; use databend_common_sql::executor::physical_plans::ReplaceDeduplicate; use databend_common_sql::executor::physical_plans::ReplaceInto; @@ -535,9 +535,9 @@ impl PhysicalPlanReplacer for ReplaceReadSource { })) } - fn replace_copy_into_table(&mut self, plan: &CopyIntoTable) -> Result { + fn replace_copy_into_table(&mut self, plan: &PhysicalAppend) -> Result { let input = self.replace(&plan.input)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: plan.plan_id, input: Box::new(input), ..plan.clone() diff --git a/src/query/service/src/sessions/queue_mgr.rs b/src/query/service/src/sessions/queue_mgr.rs index 270ea613db3d5..baf4f448cbae8 100644 --- a/src/query/service/src/sessions/queue_mgr.rs +++ b/src/query/service/src/sessions/queue_mgr.rs @@ -398,7 +398,7 @@ impl QueryEntry { Plan::InsertMultiTable(_) | Plan::Replace(_) | Plan::DataMutation { .. } - | Plan::CopyIntoTable { .. } + | Plan::Append { .. } | Plan::CopyIntoLocation(_) => { return true; } diff --git a/src/query/service/src/test_kits/fixture.rs b/src/query/service/src/test_kits/fixture.rs index e46472ab1754c..2fb6e373d75db 100644 --- a/src/query/service/src/test_kits/fixture.rs +++ b/src/query/service/src/test_kits/fixture.rs @@ -847,7 +847,7 @@ impl TestFixture { None, vec![], overwrite, - None, + false, None, )?; } else { diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 28401f54b03c2..0e18bb8e9309a 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -37,9 +37,7 @@ use crate::executor::physical_plans::ColumnMutation; use crate::executor::physical_plans::CommitSink; use crate::executor::physical_plans::ConstantTableScan; use crate::executor::physical_plans::CopyIntoLocation; -use crate::executor::physical_plans::CopyIntoTable; use crate::executor::physical_plans::CteScan; -use crate::executor::physical_plans::DistributedInsertSelect; use crate::executor::physical_plans::EvalScalar; use crate::executor::physical_plans::Exchange; use crate::executor::physical_plans::ExchangeSink; @@ -55,6 +53,7 @@ use crate::executor::physical_plans::MutationManipulate; use crate::executor::physical_plans::MutationOrganize; use crate::executor::physical_plans::MutationSource; use crate::executor::physical_plans::MutationSplit; +use crate::executor::physical_plans::PhysicalAppend; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::RangeJoinType; @@ -362,16 +361,13 @@ fn to_format_tree( PhysicalPlan::UnionAll(plan) => union_all_to_format_tree(plan, metadata, profs), PhysicalPlan::ExchangeSource(plan) => exchange_source_to_format_tree(plan, metadata), PhysicalPlan::ExchangeSink(plan) => exchange_sink_to_format_tree(plan, metadata, profs), - PhysicalPlan::DistributedInsertSelect(plan) => { - distributed_insert_to_format_tree(plan.as_ref(), metadata, profs) - } PhysicalPlan::Recluster(_) => Ok(FormatTreeNode::new("Recluster".to_string())), PhysicalPlan::CompactSource(_) => Ok(FormatTreeNode::new("CompactSource".to_string())), PhysicalPlan::CommitSink(plan) => commit_sink_to_format_tree(plan, metadata, profs), PhysicalPlan::ProjectSet(plan) => project_set_to_format_tree(plan, metadata, profs), PhysicalPlan::Udf(plan) => udf_to_format_tree(plan, metadata, profs), PhysicalPlan::RangeJoin(plan) => range_join_to_format_tree(plan, metadata, profs), - PhysicalPlan::CopyIntoTable(plan) => copy_into_table(plan), + PhysicalPlan::Append(plan) => format_append(plan), PhysicalPlan::CopyIntoLocation(plan) => copy_into_location(plan), PhysicalPlan::ReplaceAsyncSourcer(_) => { Ok(FormatTreeNode::new("ReplaceAsyncSourcer".to_string())) @@ -693,11 +689,8 @@ fn format_add_stream_column( to_format_tree(&plan.input, metadata, profs) } -fn copy_into_table(plan: &CopyIntoTable) -> Result> { - Ok(FormatTreeNode::new(format!( - "CopyIntoTable: {}", - plan.table_info - ))) +fn format_append(plan: &PhysicalAppend) -> Result> { + Ok(FormatTreeNode::new(format!("Append: {}", plan.table_info))) } fn copy_into_location(_: &CopyIntoLocation) -> Result> { @@ -1689,19 +1682,6 @@ fn exchange_sink_to_format_tree( )) } -fn distributed_insert_to_format_tree( - plan: &DistributedInsertSelect, - metadata: &Metadata, - profs: &HashMap, -) -> Result> { - let children = vec![to_format_tree(&plan.input, metadata, profs)?]; - - Ok(FormatTreeNode::with_children( - "DistributedInsertSelect".to_string(), - children, - )) -} - fn commit_sink_to_format_tree( plan: &CommitSink, metadata: &Metadata, diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index 1d97fd0d38d31..ff1f69b63b2c8 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -45,9 +45,7 @@ use crate::executor::physical_plans::CommitSink; use crate::executor::physical_plans::CompactSource; use crate::executor::physical_plans::ConstantTableScan; use crate::executor::physical_plans::CopyIntoLocation; -use crate::executor::physical_plans::CopyIntoTable; use crate::executor::physical_plans::CteScan; -use crate::executor::physical_plans::DistributedInsertSelect; use crate::executor::physical_plans::Duplicate; use crate::executor::physical_plans::EvalScalar; use crate::executor::physical_plans::Exchange; @@ -59,6 +57,7 @@ use crate::executor::physical_plans::HashJoin; use crate::executor::physical_plans::Limit; use crate::executor::physical_plans::MaterializedCte; use crate::executor::physical_plans::Mutation; +use crate::executor::physical_plans::PhysicalAppend; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::Recluster; @@ -102,15 +101,12 @@ pub enum PhysicalPlan { Udf(Udf), RecursiveCteScan(RecursiveCteScan), - /// For insert into ... select ... in cluster - DistributedInsertSelect(Box), - /// Synthesized by fragmented ExchangeSource(ExchangeSource), ExchangeSink(ExchangeSink), /// Copy into table - CopyIntoTable(Box), + Append(Box), ValueScan(Box), CopyIntoLocation(Box), @@ -274,11 +270,6 @@ impl PhysicalPlan { *next_id += 1; plan.input.adjust_plan_id(next_id); } - PhysicalPlan::DistributedInsertSelect(plan) => { - plan.plan_id = *next_id; - *next_id += 1; - plan.input.adjust_plan_id(next_id); - } PhysicalPlan::ExchangeSource(plan) => { plan.plan_id = *next_id; *next_id += 1; @@ -287,7 +278,7 @@ impl PhysicalPlan { plan.plan_id = *next_id; *next_id += 1; } - PhysicalPlan::CopyIntoTable(plan) => { + PhysicalPlan::Append(plan) => { plan.plan_id = *next_id; *next_id += 1; plan.input.adjust_plan_id(next_id); @@ -430,7 +421,6 @@ impl PhysicalPlan { PhysicalPlan::RangeJoin(v) => v.plan_id, PhysicalPlan::Exchange(v) => v.plan_id, PhysicalPlan::UnionAll(v) => v.plan_id, - PhysicalPlan::DistributedInsertSelect(v) => v.plan_id, PhysicalPlan::ExchangeSource(v) => v.plan_id, PhysicalPlan::ExchangeSink(v) => v.plan_id, PhysicalPlan::CteScan(v) => v.plan_id, @@ -447,7 +437,7 @@ impl PhysicalPlan { PhysicalPlan::MutationOrganize(v) => v.plan_id, PhysicalPlan::AddStreamColumn(v) => v.plan_id, PhysicalPlan::CommitSink(v) => v.plan_id, - PhysicalPlan::CopyIntoTable(v) => v.plan_id, + PhysicalPlan::Append(v) => v.plan_id, PhysicalPlan::CopyIntoLocation(v) => v.plan_id, PhysicalPlan::ReplaceAsyncSourcer(v) => v.plan_id, PhysicalPlan::ReplaceDeduplicate(v) => v.plan_id, @@ -489,7 +479,7 @@ impl PhysicalPlan { PhysicalPlan::UnionAll(plan) => plan.output_schema(), PhysicalPlan::ProjectSet(plan) => plan.output_schema(), PhysicalPlan::RangeJoin(plan) => plan.output_schema(), - PhysicalPlan::CopyIntoTable(plan) => plan.output_schema(), + PhysicalPlan::Append(plan) => plan.output_schema(), PhysicalPlan::CopyIntoLocation(plan) => plan.output_schema(), PhysicalPlan::CteScan(plan) => plan.output_schema(), PhysicalPlan::MaterializedCte(plan) => plan.output_schema(), @@ -510,7 +500,6 @@ impl PhysicalPlan { | PhysicalPlan::ReplaceInto(_) | PhysicalPlan::CompactSource(_) | PhysicalPlan::CommitSink(_) - | PhysicalPlan::DistributedInsertSelect(_) | PhysicalPlan::Recluster(_) => Ok(DataSchemaRef::default()), PhysicalPlan::Duplicate(plan) => plan.input.output_schema(), PhysicalPlan::Shuffle(plan) => plan.input.output_schema(), @@ -548,14 +537,13 @@ impl PhysicalPlan { PhysicalPlan::HashJoin(_) => "HashJoin".to_string(), PhysicalPlan::Exchange(_) => "Exchange".to_string(), PhysicalPlan::UnionAll(_) => "UnionAll".to_string(), - PhysicalPlan::DistributedInsertSelect(_) => "DistributedInsertSelect".to_string(), PhysicalPlan::ExchangeSource(_) => "Exchange Source".to_string(), PhysicalPlan::ExchangeSink(_) => "Exchange Sink".to_string(), PhysicalPlan::ProjectSet(_) => "Unnest".to_string(), PhysicalPlan::CompactSource(_) => "CompactBlock".to_string(), PhysicalPlan::CommitSink(_) => "CommitSink".to_string(), PhysicalPlan::RangeJoin(_) => "RangeJoin".to_string(), - PhysicalPlan::CopyIntoTable(_) => "CopyIntoTable".to_string(), + PhysicalPlan::Append(_) => "Append".to_string(), PhysicalPlan::CopyIntoLocation(_) => "CopyIntoLocation".to_string(), PhysicalPlan::ReplaceAsyncSourcer(_) => "ReplaceAsyncSourcer".to_string(), PhysicalPlan::ReplaceDeduplicate(_) => "ReplaceDeduplicate".to_string(), @@ -619,9 +607,6 @@ impl PhysicalPlan { PhysicalPlan::UnionAll(plan) => Box::new( std::iter::once(plan.left.as_ref()).chain(std::iter::once(plan.right.as_ref())), ), - PhysicalPlan::DistributedInsertSelect(plan) => { - Box::new(std::iter::once(plan.input.as_ref())) - } PhysicalPlan::CommitSink(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::ProjectSet(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::RangeJoin(plan) => Box::new( @@ -657,7 +642,7 @@ impl PhysicalPlan { PhysicalPlan::ChunkAppendData(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::ChunkMerge(plan) => Box::new(std::iter::once(plan.input.as_ref())), PhysicalPlan::ChunkCommitInsert(plan) => Box::new(std::iter::once(plan.input.as_ref())), - PhysicalPlan::CopyIntoTable(v) => Box::new(std::iter::once(v.input.as_ref())), + PhysicalPlan::Append(v) => Box::new(std::iter::once(v.input.as_ref())), } } @@ -673,7 +658,6 @@ impl PhysicalPlan { PhysicalPlan::Limit(plan) => plan.input.try_find_single_data_source(), PhysicalPlan::Exchange(plan) => plan.input.try_find_single_data_source(), PhysicalPlan::ExchangeSink(plan) => plan.input.try_find_single_data_source(), - PhysicalPlan::DistributedInsertSelect(plan) => plan.input.try_find_single_data_source(), PhysicalPlan::ProjectSet(plan) => plan.input.try_find_single_data_source(), PhysicalPlan::RowFetch(plan) => plan.input.try_find_single_data_source(), PhysicalPlan::Udf(plan) => plan.input.try_find_single_data_source(), @@ -689,7 +673,7 @@ impl PhysicalPlan { | PhysicalPlan::AggregatePartial(_) | PhysicalPlan::CompactSource(_) | PhysicalPlan::CommitSink(_) - | PhysicalPlan::CopyIntoTable(_) + | PhysicalPlan::Append(_) | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 1e519da935538..920ef132bd9fe 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -142,7 +142,7 @@ impl PhysicalPlanBuilder { } RelOperator::Recluster(recluster) => self.build_recluster(recluster).await, RelOperator::CompactBlock(compact) => self.build_compact_block(compact).await, - RelOperator::CopyIntoTable(copy_into_table) => { + RelOperator::Append(copy_into_table) => { self.build_copy_into_table(s_expr, copy_into_table).await } RelOperator::ValueScan(value_scan) => self.build_value_scan(value_scan).await, diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs index 2d5d9408e2d2d..eb41a6b3cafc8 100644 --- a/src/query/sql/src/executor/physical_plan_visitor.rs +++ b/src/query/sql/src/executor/physical_plan_visitor.rs @@ -39,9 +39,7 @@ use crate::executor::physical_plans::CommitSink; use crate::executor::physical_plans::CompactSource; use crate::executor::physical_plans::ConstantTableScan; use crate::executor::physical_plans::CopyIntoLocation; -use crate::executor::physical_plans::CopyIntoTable; use crate::executor::physical_plans::CteScan; -use crate::executor::physical_plans::DistributedInsertSelect; use crate::executor::physical_plans::Duplicate; use crate::executor::physical_plans::EvalScalar; use crate::executor::physical_plans::Exchange; @@ -53,6 +51,7 @@ use crate::executor::physical_plans::Limit; use crate::executor::physical_plans::MaterializedCte; use crate::executor::physical_plans::Mutation; use crate::executor::physical_plans::MutationSource; +use crate::executor::physical_plans::PhysicalAppend; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::Recluster; @@ -89,12 +88,11 @@ pub trait PhysicalPlanReplacer { PhysicalPlan::ExchangeSource(plan) => self.replace_exchange_source(plan), PhysicalPlan::ExchangeSink(plan) => self.replace_exchange_sink(plan), PhysicalPlan::UnionAll(plan) => self.replace_union(plan), - PhysicalPlan::DistributedInsertSelect(plan) => self.replace_insert_select(plan), PhysicalPlan::ProjectSet(plan) => self.replace_project_set(plan), PhysicalPlan::CompactSource(plan) => self.replace_compact_source(plan), PhysicalPlan::CommitSink(plan) => self.replace_commit_sink(plan), PhysicalPlan::RangeJoin(plan) => self.replace_range_join(plan), - PhysicalPlan::CopyIntoTable(plan) => self.replace_copy_into_table(plan), + PhysicalPlan::Append(plan) => self.replace_copy_into_table(plan), PhysicalPlan::CopyIntoLocation(plan) => self.replace_copy_into_location(plan), PhysicalPlan::ReplaceAsyncSourcer(plan) => self.replace_async_sourcer(plan), PhysicalPlan::ReplaceDeduplicate(plan) => self.replace_deduplicate(plan), @@ -404,10 +402,10 @@ pub trait PhysicalPlanReplacer { })) } - fn replace_copy_into_table(&mut self, plan: &CopyIntoTable) -> Result { + fn replace_copy_into_table(&mut self, plan: &PhysicalAppend) -> Result { let input = self.replace(&plan.input)?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: plan.plan_id, input: Box::new(input), ..plan.clone() @@ -426,22 +424,6 @@ pub trait PhysicalPlanReplacer { }))) } - fn replace_insert_select(&mut self, plan: &DistributedInsertSelect) -> Result { - let input = self.replace(&plan.input)?; - - Ok(PhysicalPlan::DistributedInsertSelect(Box::new( - DistributedInsertSelect { - plan_id: plan.plan_id, - input: Box::new(input), - table_info: plan.table_info.clone(), - select_schema: plan.select_schema.clone(), - insert_schema: plan.insert_schema.clone(), - select_column_bindings: plan.select_column_bindings.clone(), - cast_needed: plan.cast_needed, - }, - ))) - } - fn replace_compact_source(&mut self, plan: &CompactSource) -> Result { Ok(PhysicalPlan::CompactSource(Box::new(plan.clone()))) } @@ -710,13 +692,10 @@ impl PhysicalPlan { Self::traverse(&plan.left, pre_visit, visit, post_visit); Self::traverse(&plan.right, pre_visit, visit, post_visit); } - PhysicalPlan::DistributedInsertSelect(plan) => { - Self::traverse(&plan.input, pre_visit, visit, post_visit); - } PhysicalPlan::ProjectSet(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit) } - PhysicalPlan::CopyIntoTable(plan) => { + PhysicalPlan::Append(plan) => { Self::traverse(&plan.input, pre_visit, visit, post_visit) } PhysicalPlan::CopyIntoLocation(plan) => { diff --git a/src/query/sql/src/executor/physical_plans/common.rs b/src/query/sql/src/executor/physical_plans/common.rs index 8ea71625bd1e6..a50873a13400e 100644 --- a/src/query/sql/src/executor/physical_plans/common.rs +++ b/src/query/sql/src/executor/physical_plans/common.rs @@ -78,7 +78,6 @@ pub enum MutationKind { Replace, Recluster, Insert, - CopyInto, Compact, MergeInto, } @@ -93,7 +92,6 @@ impl Display for MutationKind { MutationKind::Replace => write!(f, "Replace"), MutationKind::Compact => write!(f, "Compact"), MutationKind::MergeInto => write!(f, "MergeInto"), - MutationKind::CopyInto => write!(f, "CopyInto"), } } } diff --git a/src/query/sql/src/executor/physical_plans/mod.rs b/src/query/sql/src/executor/physical_plans/mod.rs index 1cbbf1892fa06..9e22ab87e5ac5 100644 --- a/src/query/sql/src/executor/physical_plans/mod.rs +++ b/src/query/sql/src/executor/physical_plans/mod.rs @@ -17,6 +17,7 @@ mod physical_add_stream_column; mod physical_aggregate_expand; mod physical_aggregate_final; mod physical_aggregate_partial; +mod physical_append; mod physical_async_func; mod physical_cache_scan; mod physical_column_mutation; @@ -24,9 +25,7 @@ mod physical_commit_sink; mod physical_compact_source; mod physical_constant_table_scan; mod physical_copy_into_location; -mod physical_copy_into_table; mod physical_cte_scan; -mod physical_distributed_insert_select; mod physical_eval_scalar; mod physical_exchange; mod physical_exchange_sink; @@ -65,6 +64,7 @@ pub use physical_add_stream_column::AddStreamColumn; pub use physical_aggregate_expand::AggregateExpand; pub use physical_aggregate_final::AggregateFinal; pub use physical_aggregate_partial::AggregatePartial; +pub use physical_append::*; pub use physical_async_func::AsyncFunction; pub use physical_async_func::AsyncFunctionDesc; pub use physical_cache_scan::CacheScan; @@ -74,9 +74,7 @@ pub use physical_commit_sink::ReclusterInfoSideCar; pub use physical_compact_source::CompactSource; pub use physical_constant_table_scan::ConstantTableScan; pub use physical_copy_into_location::CopyIntoLocation; -pub use physical_copy_into_table::*; pub use physical_cte_scan::CteScan; -pub use physical_distributed_insert_select::DistributedInsertSelect; pub use physical_eval_scalar::EvalScalar; pub use physical_exchange::Exchange; pub use physical_exchange_sink::ExchangeSink; diff --git a/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs b/src/query/sql/src/executor/physical_plans/physical_append.rs similarity index 96% rename from src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs rename to src/query/sql/src/executor/physical_plans/physical_append.rs index 0e10c7fb01459..bda5a68c4f203 100644 --- a/src/query/sql/src/executor/physical_plans/physical_copy_into_table.rs +++ b/src/query/sql/src/executor/physical_plans/physical_append.rs @@ -22,7 +22,7 @@ use crate::executor::physical_plan::PhysicalPlan; use crate::ColumnBinding; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct CopyIntoTable { +pub struct PhysicalAppend { pub plan_id: u32, pub input: Box, @@ -33,7 +33,7 @@ pub struct CopyIntoTable { pub project_columns: Option>, } -impl CopyIntoTable { +impl PhysicalAppend { pub fn output_schema(&self) -> Result { Ok(DataSchemaRefExt::create(vec![])) } diff --git a/src/query/sql/src/executor/physical_plans/physical_distributed_insert_select.rs b/src/query/sql/src/executor/physical_plans/physical_distributed_insert_select.rs deleted file mode 100644 index abe0def948dbe..0000000000000 --- a/src/query/sql/src/executor/physical_plans/physical_distributed_insert_select.rs +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use databend_common_expression::DataSchemaRef; -use databend_common_meta_app::schema::TableInfo; - -use crate::executor::PhysicalPlan; -use crate::ColumnBinding; - -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct DistributedInsertSelect { - /// A unique id of operator in a `PhysicalPlan` tree. - pub plan_id: u32, - - pub input: Box, - pub table_info: TableInfo, - pub insert_schema: DataSchemaRef, - pub select_schema: DataSchemaRef, - pub select_column_bindings: Vec, - pub cast_needed: bool, -} diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index c603ef58620e4..caa5c597a094b 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -64,9 +64,9 @@ use parking_lot::RwLock; use crate::binder::bind_query::MaxColumnPosition; use crate::binder::location::parse_uri_location; use crate::binder::Binder; -use crate::executor::physical_plans::MutationKind; use crate::optimizer::SExpr; -use crate::plans::CopyIntoTablePlan; +use crate::plans::Append; +use crate::plans::AppendType; use crate::plans::Plan; use crate::BindContext; use crate::Metadata; @@ -80,13 +80,24 @@ impl<'a> Binder { bind_context: &mut BindContext, stmt: &CopyIntoTableStmt, ) -> Result { + let (catalog_name, database_name, table_name) = self.normalize_object_identifier_triple( + &stmt.dst.catalog, + &stmt.dst.database, + &stmt.dst.table, + ); match &stmt.src { CopyIntoTableSource::Location(location) => { let (copy_into_table_plan, mut stage_table_info) = self .bind_copy_into_table_common(bind_context, stmt, location) .await?; copy_into_table_plan - .collect_files(self.ctx.as_ref(), &mut stage_table_info) + .collect_files( + self.ctx.as_ref(), + &mut stage_table_info, + &catalog_name, + &database_name, + &table_name, + ) .await?; self.bind_copy_into_table_from_location( bind_context, @@ -109,7 +120,13 @@ impl<'a> Binder { .await?; copy_into_table_plan - .collect_files(self.ctx.as_ref(), &mut stage_table_info) + .collect_files( + self.ctx.as_ref(), + &mut stage_table_info, + &catalog_name, + &database_name, + &table_name, + ) .await?; self.bind_copy_from_query_into_table( bind_context, @@ -148,7 +165,7 @@ impl<'a> Binder { bind_context: &mut BindContext, stmt: &CopyIntoTableStmt, location: &FileLocation, - ) -> Result<(CopyIntoTablePlan, StageTableInfo)> { + ) -> Result<(Append, StageTableInfo)> { let (catalog_name, database_name, table_name) = self.normalize_object_identifier_triple( &stmt.dst.catalog, &stmt.dst.database, @@ -159,6 +176,17 @@ impl<'a> Binder { .get_table(&catalog_name, &database_name, &table_name) .await?; + let table_index = self.metadata.write().add_table( + catalog_name, + database_name, + table.clone(), + None, + false, + false, + false, + false, + ); + let (mut stage_info, path) = resolve_file_location(self.ctx.as_ref(), location).await?; if !stmt.file_format.is_empty() { stage_info.file_format_params = self.try_resolve_file_format(&stmt.file_format).await?; @@ -200,6 +228,7 @@ impl<'a> Binder { } else { None }; + let stage_table_info = StageTableInfo { schema: stage_schema, files_info, @@ -211,15 +240,14 @@ impl<'a> Binder { copy_into_location_options: Default::default(), copy_into_table_options: stmt.options.clone(), }; - let copy_into_plan = CopyIntoTablePlan { - catalog_name, - database_name, - table_name, + + let copy_into_plan = Append { + table_index, values_consts: vec![], required_source_schema: required_values_schema.clone(), required_values_schema: required_values_schema.clone(), project_columns: None, - mutation_kind: MutationKind::CopyInto, + append_type: AppendType::CopyInto, }; Ok((copy_into_plan, stage_table_info)) } @@ -229,7 +257,7 @@ impl<'a> Binder { async fn bind_copy_into_table_from_location( &mut self, bind_ctx: &BindContext, - copy_into_table_plan: CopyIntoTablePlan, + copy_into_table_plan: Append, stage_table_info: StageTableInfo, ) -> Result { let use_query = matches!(&stage_table_info.stage_info.file_format_params, @@ -289,11 +317,12 @@ impl<'a> Binder { let copy_into = SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(scan)); - Ok(Plan::CopyIntoTable { + Ok(Plan::Append { s_expr: Box::new(copy_into), metadata: self.metadata.clone(), stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, + forbid_occ_retry: false, }) } } @@ -378,6 +407,22 @@ impl<'a> Binder { .prepare_default_values(bind_context, &data_schema) .await?; + let table = self + .ctx + .get_table(&catalog_name, &database_name, &table_name) + .await?; + + let table_index = self.metadata.write().add_table( + catalog_name, + database_name, + table, + None, + false, + false, + false, + false, + ); + let stage_table_info = StageTableInfo { schema: stage_schema, files_info, @@ -390,14 +435,12 @@ impl<'a> Binder { copy_into_table_options: options, }; - let copy_into_table_plan = CopyIntoTablePlan { - catalog_name, - database_name, - table_name, + let copy_into_table_plan = Append { + table_index, required_values_schema, values_consts: const_columns, required_source_schema: data_schema.clone(), - mutation_kind: MutationKind::Insert, + append_type: AppendType::Insert, project_columns: None, }; @@ -414,7 +457,7 @@ impl<'a> Binder { async fn bind_copy_from_query_into_table( &mut self, bind_context: &BindContext, - mut copy_into_table_plan: CopyIntoTablePlan, + mut copy_into_table_plan: Append, stage_table_info: StageTableInfo, select_list: &'a [SelectTarget], alias: &Option, @@ -494,11 +537,12 @@ impl<'a> Binder { let copy_into = SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(s_expr)); - Ok(Plan::CopyIntoTable { + Ok(Plan::Append { s_expr: Box::new(copy_into), metadata: self.metadata.clone(), stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, + forbid_occ_retry: false, }) } diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index d2ae1fac7c203..65e85ad05df9a 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -25,11 +25,11 @@ use databend_common_expression::TableSchemaRefExt; use super::util::TableIdentifier; use crate::binder::Binder; -use crate::executor::physical_plans::MutationKind; use crate::executor::physical_plans::Values; use crate::normalize_identifier; use crate::optimizer::SExpr; -use crate::plans::CopyIntoTablePlan; +use crate::plans::Append; +use crate::plans::AppendType; use crate::plans::Plan; use crate::plans::ValueScan; use crate::BindContext; @@ -100,6 +100,17 @@ impl Binder { .await .map_err(|err| table_identifier.not_found_suggest_error(err))?; + let table_index = self.metadata.write().add_table( + catalog_name.clone(), + database_name.clone(), + table.clone(), + None, + false, + false, + false, + false, + ); + let schema = self.schema_project(&table.schema(), columns)?; let schema: DataSchemaRef = Arc::new(schema.into()); @@ -166,18 +177,16 @@ impl Binder { } }; - let copy_into = CopyIntoTablePlan { - catalog_name, - database_name, - table_name, + let copy_into = Append { + table_index, required_values_schema: schema.clone(), values_consts: vec![], required_source_schema: schema, - mutation_kind: MutationKind::Insert, + append_type: AppendType::Insert, project_columns, }; - Ok(Plan::CopyIntoTable { + Ok(Plan::Append { s_expr: Box::new(SExpr::create_unary( Arc::new(copy_into.into()), Arc::new(source), @@ -185,6 +194,7 @@ impl Binder { metadata: self.metadata.clone(), stage_table_info: None, overwrite: *overwrite, + forbid_occ_retry: false, }) } } diff --git a/src/query/sql/src/planner/binder/util.rs b/src/query/sql/src/planner/binder/util.rs index 2504c405cd029..ee86f261b8b78 100644 --- a/src/query/sql/src/planner/binder/util.rs +++ b/src/query/sql/src/planner/binder/util.rs @@ -88,7 +88,7 @@ impl Binder { | RelOperator::Mutation(_) | RelOperator::Recluster(_) | RelOperator::MutationSource(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => { return Err(ErrorCode::SyntaxException(format!( diff --git a/src/query/sql/src/planner/format/display_plan.rs b/src/query/sql/src/planner/format/display_plan.rs index cb50d4e3491b3..539a243abdd3f 100644 --- a/src/query/sql/src/planner/format/display_plan.rs +++ b/src/query/sql/src/planner/format/display_plan.rs @@ -40,7 +40,7 @@ impl Plan { Plan::ExplainSyntax { .. } => Ok("ExplainSyntax".to_string()), Plan::ExplainAnalyze { .. } => Ok("ExplainAnalyze".to_string()), - Plan::CopyIntoTable { .. } => Ok("CopyIntoTable".to_string()), + Plan::Append { .. } => Ok("Append".to_string()), Plan::CopyIntoLocation(_) => Ok("CopyIntoLocation".to_string()), // catalog diff --git a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs index 3446dfa785309..71ef006caaf97 100644 --- a/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs +++ b/src/query/sql/src/planner/optimizer/decorrelate/subquery_rewriter.rs @@ -200,7 +200,7 @@ impl SubqueryRewriter { | RelOperator::Mutation(_) | RelOperator::MutationSource(_) | RelOperator::Recluster(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => Ok(s_expr.clone()), } diff --git a/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs b/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs index 38600bb26f2f8..324b9481988df 100644 --- a/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs +++ b/src/query/sql/src/planner/optimizer/dynamic_sample/dynamic_sample.rs @@ -94,7 +94,7 @@ pub async fn dynamic_sample( | RelOperator::Mutation(_) | RelOperator::Recluster(_) | RelOperator::CompactBlock(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) | RelOperator::MutationSource(_) => { s_expr.plan().derive_stats(&RelExpr::with_s_expr(s_expr)) diff --git a/src/query/sql/src/planner/optimizer/format.rs b/src/query/sql/src/planner/optimizer/format.rs index d3705365f16b6..16304ecedbdc0 100644 --- a/src/query/sql/src/planner/optimizer/format.rs +++ b/src/query/sql/src/planner/optimizer/format.rs @@ -77,7 +77,7 @@ pub fn display_rel_op(rel_op: &RelOperator) -> String { RelOperator::MutationSource(_) => "MutationSource".to_string(), RelOperator::Recluster(_) => "Recluster".to_string(), RelOperator::CompactBlock(_) => "CompactBlock".to_string(), - RelOperator::CopyIntoTable(_) => "CopyIntoTable".to_string(), + RelOperator::Append(_) => "Append".to_string(), RelOperator::ValueScan(_) => "ValueScan".to_string(), } } diff --git a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs index ca39131bc94da..6cfff12823fa9 100644 --- a/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs +++ b/src/query/sql/src/planner/optimizer/hyper_dp/dphyp.rs @@ -300,7 +300,7 @@ impl DPhpy { | RelOperator::MutationSource(_) | RelOperator::Recluster(_) | RelOperator::CompactBlock(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) => Ok((Arc::new(s_expr.clone()), true)), } } diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 85378b373b000..18f8e9ec39ae5 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -287,11 +287,12 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result from: Box::new(Box::pin(optimize(opt_ctx, *from)).await?), options, })), - Plan::CopyIntoTable { + Plan::Append { s_expr, metadata, stage_table_info, overwrite, + forbid_occ_retry, } => { let enable_distributed = opt_ctx.enable_distributed_optimization && opt_ctx @@ -320,11 +321,12 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result SExpr::create_unary(Arc::new(s_expr.plan().clone()), Arc::new(optimized_source)) } }; - Ok(Plan::CopyIntoTable { + Ok(Plan::Append { s_expr: Box::new(optimized), metadata, stage_table_info, overwrite, + forbid_occ_retry, }) } Plan::DataMutation { s_expr, .. } => optimize_mutation(opt_ctx, *s_expr).await, diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs index a0c8420652ee1..bc53ff25c6ef6 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_semi_to_inner_join.rs @@ -143,7 +143,7 @@ fn find_group_by_keys(child: &SExpr, group_by_keys: &mut HashSet) -> | RelOperator::MutationSource(_) | RelOperator::Recluster(_) | RelOperator::CompactBlock(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) => {} } Ok(()) diff --git a/src/query/sql/src/planner/optimizer/s_expr.rs b/src/query/sql/src/planner/optimizer/s_expr.rs index 6d42d8ba818e0..0e54a0dfb6908 100644 --- a/src/query/sql/src/planner/optimizer/s_expr.rs +++ b/src/query/sql/src/planner/optimizer/s_expr.rs @@ -340,7 +340,7 @@ impl SExpr { | RelOperator::RecursiveCteScan(_) | RelOperator::Mutation(_) | RelOperator::Recluster(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => {} }; @@ -443,7 +443,7 @@ fn find_subquery(rel_op: &RelOperator) -> bool { | RelOperator::RecursiveCteScan(_) | RelOperator::Mutation(_) | RelOperator::Recluster(_) - | RelOperator::CopyIntoTable(_) + | RelOperator::Append(_) | RelOperator::ValueScan(_) | RelOperator::CompactBlock(_) => false, RelOperator::Join(op) => { diff --git a/src/query/sql/src/planner/plans/copy_into_table.rs b/src/query/sql/src/planner/plans/append.rs similarity index 72% rename from src/query/sql/src/planner/plans/copy_into_table.rs rename to src/query/sql/src/planner/plans/append.rs index 03dbd8825e563..4ec397720416a 100644 --- a/src/query/sql/src/planner/plans/copy_into_table.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use std::time::Instant; use databend_common_catalog::plan::StageTableInfo; +use databend_common_catalog::table::Table; use databend_common_catalog::table_context::FilteredCopyFiles; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; @@ -37,39 +38,106 @@ use databend_common_storage::init_stage_operator; use log::info; use super::Operator; +use super::Plan; use super::RelOp; -use crate::executor::physical_plans::CopyIntoTable; -use crate::executor::physical_plans::MutationKind; +use crate::executor::physical_plans::PhysicalAppend; use crate::executor::PhysicalPlan; use crate::executor::PhysicalPlanBuilder; +use crate::optimizer::optimize; +use crate::optimizer::OptimizerContext; use crate::optimizer::SExpr; use crate::ColumnBinding; +use crate::IndexType; #[derive(Clone, PartialEq, Eq)] -pub struct CopyIntoTablePlan { - pub catalog_name: String, - pub database_name: String, - pub table_name: String, +pub struct Append { + // Use table index instead of catalog_name, database_name, table_name here, means once a logic plan is built, + // the target table is determined, and we won't call get_table() again. + pub table_index: IndexType, pub required_values_schema: DataSchemaRef, pub values_consts: Vec, pub required_source_schema: DataSchemaRef, pub project_columns: Option>, - pub mutation_kind: MutationKind, + pub append_type: AppendType, } -impl Hash for CopyIntoTablePlan { - fn hash(&self, state: &mut H) { - self.catalog_name.hash(state); - self.database_name.hash(state); - self.table_name.hash(state); +#[derive(Clone, PartialEq, Eq)] +pub enum AppendType { + Insert, + CopyInto, +} + +pub async fn create_append_plan_from_subquery( + subquery: &Plan, + catalog_name: String, + database_name: String, + table: Arc, + target_schema: DataSchemaRef, + forbid_occ_retry: bool, + ctx: Arc, +) -> Result { + let (project_columns, source, metadata) = match subquery { + Plan::Query { + bind_context, + s_expr, + metadata, + .. + } => ( + Some(bind_context.columns.clone()), + *s_expr.clone(), + metadata.clone(), + ), + _ => unreachable!(), + }; + + let table_index = metadata.write().add_table( + catalog_name, + database_name, + table, + None, + false, + false, + false, + false, + ); + + let insert_plan = Append { + table_index, + required_values_schema: target_schema.clone(), + values_consts: vec![], + required_source_schema: target_schema, + project_columns, + append_type: AppendType::Insert, + }; + + let s_expr = SExpr::create_unary(Arc::new(insert_plan.into()), Arc::new(source)); + let plan = Plan::Append { + s_expr: Box::new(s_expr), + metadata: metadata.clone(), + stage_table_info: None, + overwrite: false, + forbid_occ_retry, + }; + let opt_ctx = OptimizerContext::new(ctx.clone(), metadata) + .with_enable_distributed_optimization(!ctx.get_cluster().is_empty()); + + optimize(opt_ctx, plan).await +} + +impl Hash for Append { + fn hash(&self, _state: &mut H) { + todo!() } } -impl CopyIntoTablePlan { +impl Append { pub async fn collect_files( &self, ctx: &dyn TableContext, stage_table_info: &mut StageTableInfo, + catalog_name: &str, + database_name: &str, + table_name: &str, ) -> Result<()> { ctx.set_status_info("begin to list files"); let start = Instant::now(); @@ -121,8 +189,8 @@ impl CopyIntoTablePlan { return Err(ErrorCode::Internal(COPY_MAX_FILES_COMMIT_MSG)); } info!( - "force mode, ignore file filtering. ({}.{})", - &self.database_name, &self.table_name + "force mode, ignore file filtering {}.{}.{}", + catalog_name, database_name, table_name ); (all_source_file_infos, vec![]) } else { @@ -135,9 +203,9 @@ impl CopyIntoTablePlan { duplicated_files, } = ctx .filter_out_copied_files( - &self.catalog_name, - &self.database_name, - &self.table_name, + catalog_name, + database_name, + table_name, &all_source_file_infos, max_files, ) @@ -176,7 +244,7 @@ impl CopyIntoTablePlan { } } -impl Debug for CopyIntoTablePlan { +impl Debug for Append { fn fmt(&self, _f: &mut Formatter) -> std::fmt::Result { // let CopyIntoTablePlan { // catalog_info, @@ -201,7 +269,7 @@ impl Debug for CopyIntoTablePlan { } } -impl CopyIntoTablePlan { +impl Append { fn copy_into_table_schema() -> DataSchemaRef { DataSchemaRefExt::create(vec![ DataField::new("File", DataType::String), @@ -219,17 +287,16 @@ impl CopyIntoTablePlan { } pub fn schema(&self) -> DataSchemaRef { - match self.mutation_kind { - MutationKind::CopyInto => Self::copy_into_table_schema(), - MutationKind::Insert => Arc::new(DataSchema::empty()), - _ => unreachable!(), + match self.append_type { + AppendType::CopyInto => Self::copy_into_table_schema(), + AppendType::Insert => Arc::new(DataSchema::empty()), } } } -impl Operator for CopyIntoTablePlan { +impl Operator for Append { fn rel_op(&self) -> RelOp { - RelOp::CopyIntoTable + RelOp::Append } } @@ -237,22 +304,19 @@ impl PhysicalPlanBuilder { pub async fn build_copy_into_table( &mut self, s_expr: &SExpr, - plan: &crate::plans::CopyIntoTablePlan, + plan: &crate::plans::Append, ) -> Result { - let to_table = self - .ctx - .get_table(&plan.catalog_name, &plan.database_name, &plan.table_name) - .await?; + let target_table = self.metadata.read().table(plan.table_index).table(); let source = self.build(s_expr.child(0)?, Default::default()).await?; - Ok(PhysicalPlan::CopyIntoTable(Box::new(CopyIntoTable { + Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: 0, input: Box::new(source), required_values_schema: plan.required_values_schema.clone(), values_consts: plan.values_consts.clone(), required_source_schema: plan.required_source_schema.clone(), - table_info: to_table.get_table_info().clone(), + table_info: target_table.get_table_info().clone(), project_columns: None, }))) } diff --git a/src/query/sql/src/planner/plans/mod.rs b/src/query/sql/src/planner/plans/mod.rs index 4ae14cfb9bce1..d0f2c779a2b2d 100644 --- a/src/query/sql/src/planner/plans/mod.rs +++ b/src/query/sql/src/planner/plans/mod.rs @@ -13,12 +13,12 @@ // limitations under the License. mod aggregate; +mod append; mod async_function; mod cache_scan; mod call; mod constant_table_scan; mod copy_into_location; -mod copy_into_table; mod cte_scan; mod data_mask; mod ddl; @@ -56,12 +56,12 @@ mod value_scan; mod window; pub use aggregate::*; +pub use append::*; pub use async_function::AsyncFunction; pub use cache_scan::*; pub use call::CallPlan; pub use constant_table_scan::ConstantTableScan; pub use copy_into_location::*; -pub use copy_into_table::*; pub use cte_scan::CteScan; pub use data_mask::*; pub use ddl::*; diff --git a/src/query/sql/src/planner/plans/operator.rs b/src/query/sql/src/planner/plans/operator.rs index 15d5a2ed1c52a..081af0c4e9c99 100644 --- a/src/query/sql/src/planner/plans/operator.rs +++ b/src/query/sql/src/planner/plans/operator.rs @@ -18,7 +18,7 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use super::CopyIntoTablePlan; +use super::Append; use super::MutationSource; use super::ValueScan; use crate::optimizer::PhysicalProperty; @@ -123,7 +123,7 @@ pub enum RelOp { Recluster, CompactBlock, MutationSource, - CopyIntoTable, + Append, ValueScan, // Pattern @@ -157,7 +157,7 @@ pub enum RelOperator { Recluster(Recluster), CompactBlock(OptimizeCompactBlock), MutationSource(MutationSource), - CopyIntoTable(CopyIntoTablePlan), + Append(Append), ValueScan(ValueScan), } @@ -188,7 +188,7 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.rel_op(), RelOperator::CompactBlock(rel_op) => rel_op.rel_op(), RelOperator::MutationSource(rel_op) => rel_op.rel_op(), - RelOperator::CopyIntoTable(rel_op) => rel_op.rel_op(), + RelOperator::Append(rel_op) => rel_op.rel_op(), RelOperator::ValueScan(rel_op) => rel_op.rel_op(), } } @@ -219,7 +219,7 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.arity(), RelOperator::CompactBlock(rel_op) => rel_op.arity(), RelOperator::MutationSource(rel_op) => rel_op.arity(), - RelOperator::CopyIntoTable(rel_op) => rel_op.arity(), + RelOperator::Append(rel_op) => rel_op.arity(), RelOperator::ValueScan(rel_op) => rel_op.arity(), } } @@ -250,7 +250,7 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::CompactBlock(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::MutationSource(rel_op) => rel_op.derive_relational_prop(rel_expr), - RelOperator::CopyIntoTable(rel_op) => rel_op.derive_relational_prop(rel_expr), + RelOperator::Append(rel_op) => rel_op.derive_relational_prop(rel_expr), RelOperator::ValueScan(rel_op) => rel_op.derive_relational_prop(rel_expr), } } @@ -281,7 +281,7 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::CompactBlock(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::MutationSource(rel_op) => rel_op.derive_physical_prop(rel_expr), - RelOperator::CopyIntoTable(rel_op) => rel_op.derive_physical_prop(rel_expr), + RelOperator::Append(rel_op) => rel_op.derive_physical_prop(rel_expr), RelOperator::ValueScan(rel_op) => rel_op.derive_physical_prop(rel_expr), } } @@ -312,7 +312,7 @@ impl Operator for RelOperator { RelOperator::Recluster(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::CompactBlock(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::MutationSource(rel_op) => rel_op.derive_stats(rel_expr), - RelOperator::CopyIntoTable(rel_op) => rel_op.derive_stats(rel_expr), + RelOperator::Append(rel_op) => rel_op.derive_stats(rel_expr), RelOperator::ValueScan(rel_op) => rel_op.derive_stats(rel_expr), } } @@ -397,7 +397,7 @@ impl Operator for RelOperator { RelOperator::MutationSource(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } - RelOperator::CopyIntoTable(rel_op) => { + RelOperator::Append(rel_op) => { rel_op.compute_required_prop_child(ctx, rel_expr, child_index, required) } RelOperator::ValueScan(rel_op) => { @@ -485,7 +485,7 @@ impl Operator for RelOperator { RelOperator::MutationSource(rel_op) => { rel_op.compute_required_prop_children(ctx, rel_expr, required) } - RelOperator::CopyIntoTable(rel_op) => { + RelOperator::Append(rel_op) => { rel_op.compute_required_prop_children(ctx, rel_expr, required) } RelOperator::ValueScan(rel_op) => { @@ -942,20 +942,20 @@ impl TryFrom for MutationSource { } } -impl From for RelOperator { - fn from(v: CopyIntoTablePlan) -> Self { - Self::CopyIntoTable(v) +impl From for RelOperator { + fn from(v: Append) -> Self { + Self::Append(v) } } -impl TryFrom for CopyIntoTablePlan { +impl TryFrom for Append { type Error = ErrorCode; fn try_from(value: RelOperator) -> Result { - if let RelOperator::CopyIntoTable(value) = value { + if let RelOperator::Append(value) = value { Ok(value) } else { Err(ErrorCode::Internal(format!( - "Cannot downcast {:?} to CopyIntoTable", + "Cannot downcast {:?} to Append", value.rel_op() ))) } diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index e555106e928ff..b124780bae533 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -238,11 +238,12 @@ pub enum Plan { metadata: MetadataRef, }, - CopyIntoTable { + Append { s_expr: Box, metadata: MetadataRef, stage_table_info: Option>, overwrite: bool, + forbid_occ_retry: bool, }, CopyIntoLocation(CopyIntoLocationPlan), @@ -418,7 +419,7 @@ impl Plan { pub fn kind(&self) -> QueryKind { match self { Plan::Query { .. } => QueryKind::Query, - Plan::CopyIntoTable { .. } => QueryKind::CopyIntoTable, + Plan::Append { .. } => QueryKind::CopyIntoTable, Plan::Explain { .. } | Plan::ExplainAnalyze { .. } | Plan::ExplainAst { .. } diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 8706a8f413ec8..2473b574bf6e7 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -69,7 +69,6 @@ use databend_storages_common_cache::LoadParams; use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::meta::ClusterKey; use databend_storages_common_table_meta::meta::CompactSegmentInfo; -use databend_storages_common_table_meta::meta::SnapshotId; use databend_storages_common_table_meta::meta::Statistics as FuseStatistics; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; @@ -729,7 +728,7 @@ impl Table for FuseTable { copied_files: Option, update_stream_meta: Vec, overwrite: bool, - prev_snapshot_id: Option, + forbid_occ_retry: bool, deduplicated_label: Option, ) -> Result<()> { self.do_commit( @@ -738,7 +737,7 @@ impl Table for FuseTable { copied_files, update_stream_meta, overwrite, - prev_snapshot_id, + forbid_occ_retry, deduplicated_label, ) } diff --git a/src/query/storages/fuse/src/operations/commit.rs b/src/query/storages/fuse/src/operations/commit.rs index 618366ad3f80c..b327162349b47 100644 --- a/src/query/storages/fuse/src/operations/commit.rs +++ b/src/query/storages/fuse/src/operations/commit.rs @@ -42,7 +42,6 @@ use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CachedObject; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::SegmentInfo; -use databend_storages_common_table_meta::meta::SnapshotId; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; @@ -76,7 +75,7 @@ impl FuseTable { copied_files: Option, update_stream_meta: Vec, overwrite: bool, - prev_snapshot_id: Option, + forbid_occ_retry: bool, deduplicated_label: Option, ) -> Result<()> { let block_thresholds = self.get_block_thresholds(); @@ -110,7 +109,7 @@ impl FuseTable { snapshot_gen.clone(), input, None, - prev_snapshot_id, + forbid_occ_retry, deduplicated_label.clone(), ) })?; diff --git a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs index dd1a23cad2637..fd930c4fa02dc 100644 --- a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs +++ b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs @@ -36,7 +36,6 @@ use databend_common_pipeline_core::processors::Processor; use databend_common_pipeline_core::processors::ProcessorPtr; use databend_storages_common_table_meta::meta::ClusterKey; use databend_storages_common_table_meta::meta::Location; -use databend_storages_common_table_meta::meta::SnapshotId; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::Versioned; use log::debug; @@ -92,7 +91,7 @@ pub struct CommitSink { new_segment_locs: Vec, start_time: Instant, - prev_snapshot_id: Option, + forbid_retry: bool, change_tracking: bool, update_stream_meta: Vec, @@ -111,7 +110,7 @@ where F: SnapshotGenerator + Send + 'static snapshot_gen: F, input: Arc, max_retry_elapsed: Option, - prev_snapshot_id: Option, + forbid_retry: bool, deduplicated_label: Option, ) -> Result { let purge = Self::do_purge(table, &snapshot_gen); @@ -130,20 +129,15 @@ where F: SnapshotGenerator + Send + 'static input, new_segment_locs: vec![], start_time: Instant::now(), - prev_snapshot_id, + forbid_retry, change_tracking: table.change_tracking_enabled(), update_stream_meta, deduplicated_label, }))) } - fn is_error_recoverable(&self, e: &ErrorCode) -> bool { - // When prev_snapshot_id is some, means it is an alter table column modification or truncate. - // In this case if commit to meta fail and error is TABLE_VERSION_MISMATCHED operation will be aborted. - if self.prev_snapshot_id.is_some() && e.code() == ErrorCode::TABLE_VERSION_MISMATCHED { - return false; - } - FuseTable::is_error_recoverable(e, self.purge) + fn can_retry(&self, e: &ErrorCode) -> bool { + !self.forbid_retry && FuseTable::is_error_recoverable(e, self.purge) } fn read_meta(&mut self) -> Result { @@ -330,28 +324,15 @@ where F: SnapshotGenerator + Send + 'static // save current table info when commit to meta server // if table_id not match, update table meta will fail let table_info = fuse_table.table_info.clone(); - // check if snapshot has been changed - let snapshot_has_changed = self.prev_snapshot_id.is_some_and(|prev_snapshot_id| { - previous - .as_ref() - .map_or(true, |previous| previous.snapshot_id != prev_snapshot_id) - }); - if snapshot_has_changed { - // if snapshot has changed abort operation - self.state = State::Abort(ErrorCode::StorageOther( - "commit failed because the snapshot had changed during the commit process", - )); - } else { - self.snapshot_gen - .fill_default_values(schema, &previous) - .await?; - - self.state = State::GenerateSnapshot { - previous, - cluster_key_meta: fuse_table.cluster_key_meta.clone(), - table_info, - }; - } + self.snapshot_gen + .fill_default_values(schema, &previous) + .await?; + + self.state = State::GenerateSnapshot { + previous, + cluster_key_meta: fuse_table.cluster_key_meta.clone(), + table_info, + }; } State::TryCommit { data, @@ -451,7 +432,7 @@ where F: SnapshotGenerator + Send + 'static info!("commit mutation success, targets {:?}", target_descriptions); self.state = State::Finish; } - Err(e) if self.is_error_recoverable(&e) => { + Err(e) if self.can_retry(&e) => { let table_info = self.table.get_table_info(); match self.backoff.next_backoff() { Some(d) => { diff --git a/src/query/storages/fuse/src/operations/truncate.rs b/src/query/storages/fuse/src/operations/truncate.rs index 6da256d0f7309..72c5aa8c5d127 100644 --- a/src/query/storages/fuse/src/operations/truncate.rs +++ b/src/query/storages/fuse/src/operations/truncate.rs @@ -20,7 +20,6 @@ use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_common_pipeline_core::Pipeline; use databend_common_pipeline_sources::OneBlockSource; -use databend_storages_common_table_meta::meta::TableSnapshot; use crate::operations::common::CommitMeta; use crate::operations::common::CommitSink; @@ -38,10 +37,7 @@ impl FuseTable { pipeline: &mut Pipeline, mode: TruncateMode, ) -> Result<()> { - if let Some(prev_snapshot) = self.read_table_snapshot().await? { - self.build_truncate_pipeline(ctx, pipeline, mode, prev_snapshot)?; - } - Ok(()) + self.build_truncate_pipeline(ctx, pipeline, mode) } #[inline] @@ -51,14 +47,9 @@ impl FuseTable { ctx: Arc, pipeline: &mut Pipeline, mode: TruncateMode, - prev_snapshot: Arc, ) -> Result<()> { // Delete operation commit can retry multi-times if table version mismatched. - let prev_snapshot_id = if !matches!(mode, TruncateMode::Delete) { - Some(prev_snapshot.snapshot_id) - } else { - None - }; + let forbid_occ_retry = !matches!(mode, TruncateMode::Delete); pipeline.add_source( |output| { let meta = CommitMeta { @@ -82,7 +73,7 @@ impl FuseTable { snapshot_gen.clone(), input, None, - prev_snapshot_id, + forbid_occ_retry, None, ) }) diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs index fa728e47c2264..3577e5943927c 100644 --- a/src/query/storages/hive/hive/src/hive_table.rs +++ b/src/query/storages/hive/hive/src/hive_table.rs @@ -53,7 +53,6 @@ use databend_common_storage::DataOperator; use databend_common_storages_parquet::ParquetRSPruner; use databend_common_storages_parquet::ParquetRSReaderBuilder; use databend_storages_common_pruner::partition_prunner::PartitionPruner; -use databend_storages_common_table_meta::meta::SnapshotId; use databend_storages_common_table_meta::table::ChangeType; use futures::TryStreamExt; use log::info; @@ -447,7 +446,7 @@ impl Table for HiveTable { _copied_files: Option, _update_stream_meta: Vec, _overwrite: bool, - _prev_snapshot_id: Option, + _forbid_occ_retry: bool, _deduplicated_label: Option, ) -> Result<()> { Err(ErrorCode::Unimplemented(format!( diff --git a/src/query/storages/memory/src/memory_table.rs b/src/query/storages/memory/src/memory_table.rs index 816fd71d6b835..82bb4ef2633f3 100644 --- a/src/query/storages/memory/src/memory_table.rs +++ b/src/query/storages/memory/src/memory_table.rs @@ -49,7 +49,6 @@ use databend_common_pipeline_sources::SyncSourcer; use databend_common_storage::StorageMetrics; use databend_storages_common_blocks::memory::InMemoryDataKey; use databend_storages_common_blocks::memory::IN_MEMORY_DATA; -use databend_storages_common_table_meta::meta::SnapshotId; use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX; use parking_lot::Mutex; use parking_lot::RwLock; @@ -257,7 +256,7 @@ impl Table for MemoryTable { _copied_files: Option, _update_stream_meta: Vec, overwrite: bool, - _prev_snapshot_id: Option, + _forbid_occ_retry: bool, _deduplicated_label: Option, ) -> Result<()> { pipeline.try_resize(1)?; From 7c30c8d374193ba163446c197265cb1dca154188 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 29 Nov 2024 08:53:14 +0800 Subject: [PATCH 05/22] fix merge --- src/query/sql/src/planner/optimizer/optimizer.rs | 12 +++++++++++- .../src/operations/common/processors/sink_commit.rs | 13 ++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 18f8e9ec39ae5..e229a291f5798 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -46,6 +46,7 @@ use crate::optimizer::RuleID; use crate::optimizer::SExpr; use crate::optimizer::DEFAULT_REWRITE_RULES; use crate::planner::query_executor::QueryExecutor; +use crate::plans::Append; use crate::plans::CopyIntoLocationPlan; use crate::plans::Exchange; use crate::plans::Join; @@ -294,11 +295,20 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result overwrite, forbid_occ_retry, } => { + let support_distributed_insert = { + let append: Append = s_expr.plan().clone().try_into()?; + let metadata = metadata.read(); + metadata + .table(append.table_index) + .table() + .support_distributed_insert() + }; let enable_distributed = opt_ctx.enable_distributed_optimization && opt_ctx .table_ctx .get_settings() - .get_enable_distributed_copy()?; + .get_enable_distributed_copy()? + && support_distributed_insert; info!( "after optimization enable_distributed_copy? : {}", enable_distributed diff --git a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs index 1cfd053845785..b0e880aa9774d 100644 --- a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs +++ b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs @@ -137,23 +137,18 @@ where F: SnapshotGenerator + Send + 'static } fn is_error_recoverable(&self, e: &ErrorCode) -> bool { - let code = e.code(); - // When prev_snapshot_id is some, means it is an alter table column modification or truncate. - // In this case if commit to meta fail and error is TABLE_VERSION_MISMATCHED operation will be aborted. - if self.prev_snapshot_id.is_some() && code == ErrorCode::TABLE_VERSION_MISMATCHED { + if self.forbid_retry { return false; } - code == ErrorCode::TABLE_VERSION_MISMATCHED - || (self.purge && code == ErrorCode::STORAGE_NOT_FOUND) + e.code() == ErrorCode::TABLE_VERSION_MISMATCHED + || (self.purge && e.code() == ErrorCode::STORAGE_NOT_FOUND) } fn no_side_effects_in_meta_store(e: &ErrorCode) -> bool { // currently, the only error that we know, which indicates there are no side effects // is TABLE_VERSION_MISMATCHED e.code() == ErrorCode::TABLE_VERSION_MISMATCHED - fn can_retry(&self, e: &ErrorCode) -> bool { - !self.forbid_retry && FuseTable::is_error_recoverable(e, self.purge) } fn read_meta(&mut self) -> Result { @@ -448,7 +443,7 @@ where F: SnapshotGenerator + Send + 'static info!("commit mutation success, targets {:?}", target_descriptions); self.state = State::Finish; } - Err(e) if self.can_retry(&e) => { + Err(e) if self.is_error_recoverable(&e) => { let table_info = self.table.get_table_info(); match self.backoff.next_backoff() { Some(d) => { From 410fe6f31b0189d421389257e5e838157171cf1d Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 29 Nov 2024 15:45:00 +0800 Subject: [PATCH 06/22] fix schema --- .../service/src/interpreters/interpreter_append.rs | 10 +++++----- .../service/src/pipelines/builders/builder_append.rs | 2 +- src/query/service/src/pipelines/pipeline_builder.rs | 2 +- .../service/src/schedulers/fragments/fragmenter.rs | 2 +- .../service/src/schedulers/fragments/plan_fragment.rs | 2 +- src/query/sql/src/executor/physical_plan_builder.rs | 4 +--- src/query/sql/src/executor/physical_plan_visitor.rs | 4 ++-- src/query/sql/src/planner/binder/copy_into_table.rs | 5 +++-- src/query/sql/src/planner/plans/append.rs | 11 ++++++++--- 9 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index fac02fd5b4ac0..e39e0aa63d58d 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -65,15 +65,15 @@ impl Interpreter for AppendInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - debug!("ctx.id" = self.ctx.get_id().as_str(); "copy_into_table_interpreter_execute_v2"); + debug!("ctx.id" = self.ctx.get_id().as_str(); "append_interpreter_execute"); if check_deduplicate_label(self.ctx.clone()).await? { return Ok(PipelineBuildResult::create()); } - let copy_into_table: Append = self.s_expr.plan().clone().try_into()?; + let append: Append = self.s_expr.plan().clone().try_into()?; let (target_table, catalog, database, table) = { let metadata = self.metadata.read(); - let t = metadata.table(copy_into_table.table_index); + let t = metadata.table(append.table_index); ( t.table(), t.catalog().to_string(), @@ -162,8 +162,8 @@ impl Interpreter for AppendInterpreter { } fn inject_result(&self) -> Result { - let copy_into_table: Append = self.s_expr.plan().clone().try_into()?; - match ©_into_table.append_type { + let append: Append = self.s_expr.plan().clone().try_into()?; + match &append.append_type { AppendType::CopyInto => { let blocks = self.get_copy_into_table_result()?; Ok(Box::pin(DataBlockStream::create(None, blocks))) diff --git a/src/query/service/src/pipelines/builders/builder_append.rs b/src/query/service/src/pipelines/builders/builder_append.rs index e8f5f12299d0c..2aa82e115a1c6 100644 --- a/src/query/service/src/pipelines/builders/builder_append.rs +++ b/src/query/service/src/pipelines/builders/builder_append.rs @@ -41,7 +41,7 @@ use crate::pipelines::PipelineBuilder; use crate::sessions::QueryContext; impl PipelineBuilder { - pub(crate) fn build_copy_into_table(&mut self, copy: &PhysicalAppend) -> Result<()> { + pub(crate) fn build_append(&mut self, copy: &PhysicalAppend) -> Result<()> { let to_table = self.ctx.build_table_by_table_info(©.table_info, None)?; self.ctx .set_read_block_thresholds(to_table.get_block_thresholds()); diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index 3334defe6bd02..e812f9163645c 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -195,7 +195,7 @@ impl PipelineBuilder { } // Copy into. - PhysicalPlan::Append(copy) => self.build_copy_into_table(copy), + PhysicalPlan::Append(append) => self.build_append(append), PhysicalPlan::CopyIntoLocation(copy) => self.build_copy_into_location(copy), // Replace. diff --git a/src/query/service/src/schedulers/fragments/fragmenter.rs b/src/query/service/src/schedulers/fragments/fragmenter.rs index 118e526ff4739..727abbc99b817 100644 --- a/src/query/service/src/schedulers/fragments/fragmenter.rs +++ b/src/query/service/src/schedulers/fragments/fragmenter.rs @@ -178,7 +178,7 @@ impl PhysicalPlanReplacer for Fragmenter { }))) } - fn replace_copy_into_table(&mut self, plan: &PhysicalAppend) -> Result { + fn replace_append(&mut self, plan: &PhysicalAppend) -> Result { let input = self.replace(&plan.input)?; Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: plan.plan_id, diff --git a/src/query/service/src/schedulers/fragments/plan_fragment.rs b/src/query/service/src/schedulers/fragments/plan_fragment.rs index d4a1b26654c14..d7cc63415c0a1 100644 --- a/src/query/service/src/schedulers/fragments/plan_fragment.rs +++ b/src/query/service/src/schedulers/fragments/plan_fragment.rs @@ -535,7 +535,7 @@ impl PhysicalPlanReplacer for ReplaceReadSource { })) } - fn replace_copy_into_table(&mut self, plan: &PhysicalAppend) -> Result { + fn replace_append(&mut self, plan: &PhysicalAppend) -> Result { let input = self.replace(&plan.input)?; Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: plan.plan_id, diff --git a/src/query/sql/src/executor/physical_plan_builder.rs b/src/query/sql/src/executor/physical_plan_builder.rs index 920ef132bd9fe..3aab6a6dab068 100644 --- a/src/query/sql/src/executor/physical_plan_builder.rs +++ b/src/query/sql/src/executor/physical_plan_builder.rs @@ -142,9 +142,7 @@ impl PhysicalPlanBuilder { } RelOperator::Recluster(recluster) => self.build_recluster(recluster).await, RelOperator::CompactBlock(compact) => self.build_compact_block(compact).await, - RelOperator::Append(copy_into_table) => { - self.build_copy_into_table(s_expr, copy_into_table).await - } + RelOperator::Append(append) => self.build_append(s_expr, append).await, RelOperator::ValueScan(value_scan) => self.build_value_scan(value_scan).await, } } diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs index eb41a6b3cafc8..41984077bcf9b 100644 --- a/src/query/sql/src/executor/physical_plan_visitor.rs +++ b/src/query/sql/src/executor/physical_plan_visitor.rs @@ -92,7 +92,7 @@ pub trait PhysicalPlanReplacer { PhysicalPlan::CompactSource(plan) => self.replace_compact_source(plan), PhysicalPlan::CommitSink(plan) => self.replace_commit_sink(plan), PhysicalPlan::RangeJoin(plan) => self.replace_range_join(plan), - PhysicalPlan::Append(plan) => self.replace_copy_into_table(plan), + PhysicalPlan::Append(plan) => self.replace_append(plan), PhysicalPlan::CopyIntoLocation(plan) => self.replace_copy_into_location(plan), PhysicalPlan::ReplaceAsyncSourcer(plan) => self.replace_async_sourcer(plan), PhysicalPlan::ReplaceDeduplicate(plan) => self.replace_deduplicate(plan), @@ -402,7 +402,7 @@ pub trait PhysicalPlanReplacer { })) } - fn replace_copy_into_table(&mut self, plan: &PhysicalAppend) -> Result { + fn replace_append(&mut self, plan: &PhysicalAppend) -> Result { let input = self.replace(&plan.input)?; Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index caa5c597a094b..2ee55ecb2aac5 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -257,7 +257,7 @@ impl<'a> Binder { async fn bind_copy_into_table_from_location( &mut self, bind_ctx: &BindContext, - copy_into_table_plan: Append, + mut copy_into_table_plan: Append, stage_table_info: StageTableInfo, ) -> Result { let use_query = matches!(&stage_table_info.stage_info.file_format_params, @@ -304,7 +304,7 @@ impl<'a> Binder { ) .await } else { - let (scan, _) = self + let (scan, bind_context) = self .bind_stage_table( self.ctx.clone(), bind_ctx, @@ -314,6 +314,7 @@ impl<'a> Binder { stage_table_info.files_to_copy.clone(), ) .await?; + copy_into_table_plan.project_columns = Some(bind_context.columns.clone()); let copy_into = SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(scan)); diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index 4ec397720416a..bf52deb0b17d6 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -301,14 +301,19 @@ impl Operator for Append { } impl PhysicalPlanBuilder { - pub async fn build_copy_into_table( + pub async fn build_append( &mut self, s_expr: &SExpr, plan: &crate::plans::Append, ) -> Result { let target_table = self.metadata.read().table(plan.table_index).table(); - let source = self.build(s_expr.child(0)?, Default::default()).await?; + let column_set = plan + .project_columns + .as_ref() + .map(|project_columns| project_columns.iter().map(|c| c.index).collect()) + .unwrap_or_default(); + let source = self.build(s_expr.child(0)?, column_set).await?; Ok(PhysicalPlan::Append(Box::new(PhysicalAppend { plan_id: 0, @@ -317,7 +322,7 @@ impl PhysicalPlanBuilder { values_consts: plan.values_consts.clone(), required_source_schema: plan.required_source_schema.clone(), table_info: target_table.get_table_info().clone(), - project_columns: None, + project_columns: plan.project_columns.clone(), }))) } } From 5c4dafb9dbd68ab79bf0746c848b2d4063a1e6fc Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 29 Nov 2024 16:54:16 +0800 Subject: [PATCH 07/22] make lint --- src/query/sql/src/planner/binder/copy_into_table.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 5a10b6f188253..fbcb4225a819b 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -268,15 +268,13 @@ impl<'a> Binder { ) -> Result { let use_query = matches!(&stage_table_info.stage_info.file_format_params, FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); + let case_sensitive = stage_table_info.copy_into_table_options.column_match_mode + == Some(ColumnMatchMode::CaseSensitive); if use_query { let mut select_list = Vec::with_capacity(copy_into_table_plan.required_source_schema.num_fields()); - let case_sensitive = plan - .stage_table_info - .copy_into_table_options - .column_match_mode - == Some(ColumnMatchMode::CaseSensitive); + for dest_field in copy_into_table_plan.required_source_schema.fields().iter() { let column = Expr::ColumnRef { span: None, @@ -331,6 +329,7 @@ impl<'a> Binder { stage_table_info.files_info.clone(), &None, stage_table_info.files_to_copy.clone(), + case_sensitive, ) .await?; copy_into_table_plan.project_columns = Some(bind_context.columns.clone()); @@ -486,6 +485,8 @@ impl<'a> Binder { alias: &Option, ) -> Result { let table_ctx = self.ctx.clone(); + let case_sensitive = stage_table_info.copy_into_table_options.column_match_mode + == Some(ColumnMatchMode::CaseSensitive); let (s_expr, mut from_context) = self .bind_stage_table( table_ctx, @@ -494,6 +495,7 @@ impl<'a> Binder { stage_table_info.files_info.clone(), alias, stage_table_info.files_to_copy.clone(), + case_sensitive, ) .await?; From fd89e6d6a329bca5aa8e4671ef1576088fc8e882 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 29 Nov 2024 19:21:16 +0800 Subject: [PATCH 08/22] fix modify column --- src/query/service/src/interpreters/interpreter_table_create.rs | 1 + .../src/interpreters/interpreter_table_modify_column.rs | 1 + src/query/sql/src/planner/plans/append.rs | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs index 1794375405150..14dd5e25bb215 100644 --- a/src/query/service/src/interpreters/interpreter_table_create.rs +++ b/src/query/service/src/interpreters/interpreter_table_create.rs @@ -233,6 +233,7 @@ impl CreateTableInterpreter { Arc::new(self.plan.schema.clone().into()), false, self.ctx.clone(), + false, ) .await?; let mut pipeline = InterpreterFactory::get(self.ctx.clone(), &append_plan) diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index cc6ca8736ca10..62a8d87b2825d 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -360,6 +360,7 @@ impl ModifyTableColumnInterpreter { Arc::new(DataSchema::from(&new_schema)), true, self.ctx.clone(), + true, ) .await?; InterpreterFactory::get(self.ctx.clone(), &append_plan) diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index bf52deb0b17d6..c1ac5a2836ba2 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -75,6 +75,7 @@ pub async fn create_append_plan_from_subquery( target_schema: DataSchemaRef, forbid_occ_retry: bool, ctx: Arc, + overwrite: bool, ) -> Result { let (project_columns, source, metadata) = match subquery { Plan::Query { @@ -115,7 +116,7 @@ pub async fn create_append_plan_from_subquery( s_expr: Box::new(s_expr), metadata: metadata.clone(), stage_table_info: None, - overwrite: false, + overwrite, forbid_occ_retry, }; let opt_ctx = OptimizerContext::new(ctx.clone(), metadata) From 2af7d260cf6592576911a31f42069a424522b724 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Fri, 29 Nov 2024 20:23:38 +0800 Subject: [PATCH 09/22] fix schema --- .../service/src/interpreters/interpreter_append.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index e39e0aa63d58d..d9f847a16270f 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use databend_common_catalog::lock::LockTableOption; use databend_common_catalog::plan::StageTableInfo; use databend_common_catalog::table::TableExt; +use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::Int32Type; use databend_common_expression::types::StringType; @@ -83,6 +84,17 @@ impl Interpreter for AppendInterpreter { }; target_table.check_mutable()?; + if append + .project_columns + .as_ref() + .is_some_and(|p| p.len() != append.required_source_schema.num_fields()) + { + return Err(ErrorCode::BadArguments(format!( + "Fields in select statement is not equal with expected, select fields: {}, insert fields: {}", + append.project_columns.as_ref().unwrap().len(), + append.required_source_schema.num_fields(), + ))); + } // 1. build source and append pipeline let mut build_res = { From 2b6600b31c6483c7a6a51e24308efb9c98f3492b Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sat, 30 Nov 2024 10:56:09 +0800 Subject: [PATCH 10/22] fix --- Cargo.lock | 1 + src/query/sql/Cargo.toml | 1 + .../sql/src/planner/binder/copy_into_table.rs | 27 +++++++++++-------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8f190d2e76270..f6b73a79484ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4029,6 +4029,7 @@ dependencies = [ "databend-common-settings", "databend-common-storage", "databend-common-storages-result-cache", + "databend-common-storages-stage", "databend-common-storages-view", "databend-common-users", "databend-enterprise-data-mask-feature", diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index 1f017a82dac1e..adfb81e625e13 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -32,6 +32,7 @@ databend-common-pipeline-transforms = { workspace = true } databend-common-settings = { workspace = true } databend-common-storage = { workspace = true } databend-common-storages-result-cache = { workspace = true } +databend-common-storages-stage = { workspace = true } databend-common-storages-view = { workspace = true } databend-common-users = { workspace = true } databend-enterprise-data-mask-feature = { workspace = true } diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index fbcb4225a819b..38e47c724786b 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -36,6 +36,7 @@ use databend_common_ast::ast::TypeName; use databend_common_ast::parser::parse_values_with_placeholder; use databend_common_ast::parser::tokenize_sql; use databend_common_ast::Span; +use databend_common_catalog::catalog::CATALOG_DEFAULT; use databend_common_catalog::plan::list_stage_files; use databend_common_catalog::plan::StageTableInfo; use databend_common_catalog::table_context::StageAttachment; @@ -57,6 +58,7 @@ use databend_common_meta_app::principal::NullAs; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::COPY_MAX_FILES_PER_COMMIT; use databend_common_storage::StageFilesInfo; +use databend_common_storages_stage::StageTable; use databend_common_users::UserApiProvider; use derive_visitor::Drive; use log::debug; @@ -321,17 +323,20 @@ impl<'a> Binder { ) .await } else { - let (scan, bind_context) = self - .bind_stage_table( - self.ctx.clone(), - bind_ctx, - stage_table_info.stage_info.clone(), - stage_table_info.files_info.clone(), - &None, - stage_table_info.files_to_copy.clone(), - case_sensitive, - ) - .await?; + let table = StageTable::try_create(stage_table_info.clone())?; + let table_index = self.metadata.write().add_table( + CATALOG_DEFAULT.to_string(), + "system".to_string(), + table.clone(), + None, + false, + false, + true, + false, + ); + + let (scan, bind_context) = + self.bind_base_table(bind_ctx, "system", table_index, None, &None)?; copy_into_table_plan.project_columns = Some(bind_context.columns.clone()); let copy_into = From 046685e468f488f97e29815c2ad202daf4fe82e6 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sat, 30 Nov 2024 14:02:37 +0800 Subject: [PATCH 11/22] fix --- src/query/service/src/interpreters/interpreter_append.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index d9f847a16270f..5c2b21a082b9a 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -28,6 +28,7 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::PhysicalPlanBuilder; use databend_common_sql::optimizer::SExpr; use databend_common_sql::plans::AppendType; +use databend_common_sql::plans::RelOperator; use log::debug; use log::info; @@ -71,7 +72,11 @@ impl Interpreter for AppendInterpreter { return Ok(PipelineBuildResult::create()); } - let append: Append = self.s_expr.plan().clone().try_into()?; + let append: Append = match &self.s_expr.plan() { + RelOperator::Append(append) => append.clone(), + RelOperator::Exchange(_) => self.s_expr.child(0).unwrap().plan().clone().try_into()?, + _ => unreachable!(), + }; let (target_table, catalog, database, table) = { let metadata = self.metadata.read(); let t = metadata.table(append.table_index); From 53f42186a0e3e95fd1deec7d90143334a2c3f27d Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sat, 30 Nov 2024 14:37:40 +0800 Subject: [PATCH 12/22] fix --- src/query/service/src/interpreters/interpreter_append.rs | 6 +++++- src/query/sql/src/planner/optimizer/util.rs | 1 + src/query/sql/src/planner/plans/value_scan.rs | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index 5c2b21a082b9a..91c9ed59dc10b 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -179,7 +179,11 @@ impl Interpreter for AppendInterpreter { } fn inject_result(&self) -> Result { - let append: Append = self.s_expr.plan().clone().try_into()?; + let append: Append = match &self.s_expr.plan() { + RelOperator::Append(append) => append.clone(), + RelOperator::Exchange(_) => self.s_expr.child(0).unwrap().plan().clone().try_into()?, + _ => unreachable!(), + }; match &append.append_type { AppendType::CopyInto => { let blocks = self.get_copy_into_table_result()?; diff --git a/src/query/sql/src/planner/optimizer/util.rs b/src/query/sql/src/planner/optimizer/util.rs index 8120893a8cec2..657e1c8d562e4 100644 --- a/src/query/sql/src/planner/optimizer/util.rs +++ b/src/query/sql/src/planner/optimizer/util.rs @@ -27,4 +27,5 @@ pub fn contains_local_table_scan(s_expr: &SExpr, metadata: &MetadataRef) -> bool false } || matches!(s_expr.plan(), RelOperator::RecursiveCteScan { .. }) + || matches!(s_expr.plan(), RelOperator::ValueScan { .. }) } diff --git a/src/query/sql/src/planner/plans/value_scan.rs b/src/query/sql/src/planner/plans/value_scan.rs index 5c3f4d786a302..ea16b95b95704 100644 --- a/src/query/sql/src/planner/plans/value_scan.rs +++ b/src/query/sql/src/planner/plans/value_scan.rs @@ -19,6 +19,9 @@ use crate::executor::physical_plans::PhysicalValueScan; use crate::executor::physical_plans::Values; use crate::executor::PhysicalPlan; use crate::executor::PhysicalPlanBuilder; +use crate::optimizer::Distribution; +use crate::optimizer::PhysicalProperty; +use crate::optimizer::RelExpr; use crate::plans::Operator; use crate::plans::RelOp; @@ -42,6 +45,12 @@ impl Operator for ValueScan { fn arity(&self) -> usize { 0 } + + fn derive_physical_prop(&self, _rel_expr: &RelExpr) -> Result { + Ok(PhysicalProperty { + distribution: Distribution::Random, + }) + } } impl PhysicalPlanBuilder { From 41a175f940e5fb0ae16d81cc2178145833a1b211 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sat, 30 Nov 2024 16:43:30 +0800 Subject: [PATCH 13/22] update --- .../src/interpreters/interpreter_append.rs | 7 +- src/query/sql/src/planner/optimizer/mod.rs | 1 + .../sql/src/planner/optimizer/optimizer.rs | 78 ++++++++++--------- src/query/sql/src/planner/plans/append.rs | 6 +- 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index 91c9ed59dc10b..bde7e5f484b35 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -75,7 +75,12 @@ impl Interpreter for AppendInterpreter { let append: Append = match &self.s_expr.plan() { RelOperator::Append(append) => append.clone(), RelOperator::Exchange(_) => self.s_expr.child(0).unwrap().plan().clone().try_into()?, - _ => unreachable!(), + plan => { + return Err(ErrorCode::Internal(format!( + "AppendInterpreter: unexpected plan type: {:?}", + plan + ))); + } }; let (target_table, catalog, database, table) = { let metadata = self.metadata.read(); diff --git a/src/query/sql/src/planner/optimizer/mod.rs b/src/query/sql/src/planner/optimizer/mod.rs index d60fc78016712..5d5f3c61ed03e 100644 --- a/src/query/sql/src/planner/optimizer/mod.rs +++ b/src/query/sql/src/planner/optimizer/mod.rs @@ -43,6 +43,7 @@ pub use hyper_dp::DPhpy; pub use m_expr::MExpr; pub use memo::Memo; pub use optimizer::optimize; +pub use optimizer::optimize_append; pub use optimizer::optimize_query; pub use optimizer::OptimizerContext; pub use optimizer::RecursiveOptimizer; diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index e229a291f5798..1b5cad5610dc3 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -295,44 +295,17 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result overwrite, forbid_occ_retry, } => { - let support_distributed_insert = { - let append: Append = s_expr.plan().clone().try_into()?; - let metadata = metadata.read(); - metadata - .table(append.table_index) - .table() - .support_distributed_insert() - }; - let enable_distributed = opt_ctx.enable_distributed_optimization - && opt_ctx - .table_ctx - .get_settings() - .get_enable_distributed_copy()? - && support_distributed_insert; - info!( - "after optimization enable_distributed_copy? : {}", - enable_distributed - ); - let mut optimized_source = - optimize_query(&mut opt_ctx, s_expr.child(0)?.clone()).await?; - let optimized = match enable_distributed { - true => { - if let RelOperator::Exchange(Exchange::Merge) = optimized_source.plan.as_ref() { - optimized_source = optimized_source.child(0).unwrap().clone(); - } - let copy_into = SExpr::create_unary( - Arc::new(s_expr.plan().clone()), - Arc::new(optimized_source), - ); - let exchange = Arc::new(RelOperator::Exchange(Exchange::Merge)); - SExpr::create_unary(exchange, Arc::new(copy_into)) - } - false => { - SExpr::create_unary(Arc::new(s_expr.plan().clone()), Arc::new(optimized_source)) - } - }; + let append: Append = s_expr.plan().clone().try_into()?; + let source = s_expr.child(0)?.clone(); + let optimized_source = optimize_query(&mut opt_ctx, source).await?; + let optimized_append = optimize_append( + append, + optimized_source, + metadata.clone(), + opt_ctx.table_ctx.as_ref(), + )?; Ok(Plan::Append { - s_expr: Box::new(optimized), + s_expr: Box::new(optimized_append), metadata, stage_table_info, overwrite, @@ -610,3 +583,34 @@ async fn optimize_mutation(mut opt_ctx: OptimizerContext, s_expr: SExpr) -> Resu metadata: opt_ctx.metadata.clone(), }) } + +pub fn optimize_append( + append: Append, + source: SExpr, + metadata: MetadataRef, + table_ctx: &dyn TableContext, +) -> Result { + let support_distributed_insert = { + let metadata = metadata.read(); + metadata + .table(append.table_index) + .table() + .support_distributed_insert() + }; + let enable_distributed = table_ctx.get_settings().get_enable_distributed_copy()? + && support_distributed_insert + && matches!(source.plan(), RelOperator::Exchange(Exchange::Merge)); + info!( + "after optimization enable_distributed_copy? : {}", + enable_distributed + ); + match enable_distributed { + true => { + let source = source.child(0).unwrap().clone(); + let copy_into = SExpr::create_unary(Arc::new(append.into()), Arc::new(source)); + let exchange = Arc::new(RelOperator::Exchange(Exchange::Merge)); + Ok(SExpr::create_unary(exchange, Arc::new(copy_into))) + } + false => Ok(SExpr::create_unary(Arc::new(append.into()), Arc::new(source))), + } +} diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index c1ac5a2836ba2..9986db4999b74 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -44,6 +44,7 @@ use crate::executor::physical_plans::PhysicalAppend; use crate::executor::PhysicalPlan; use crate::executor::PhysicalPlanBuilder; use crate::optimizer::optimize; +use crate::optimizer::optimize_append; use crate::optimizer::OptimizerContext; use crate::optimizer::SExpr; use crate::ColumnBinding; @@ -111,9 +112,10 @@ pub async fn create_append_plan_from_subquery( append_type: AppendType::Insert, }; - let s_expr = SExpr::create_unary(Arc::new(insert_plan.into()), Arc::new(source)); + let optimized_append = optimize_append(insert_plan, source, metadata.clone(), ctx.as_ref())?; + let plan = Plan::Append { - s_expr: Box::new(s_expr), + s_expr: Box::new(optimized_append), metadata: metadata.clone(), stage_table_info: None, overwrite, From e73e2d070b6f920b4677472fed51ea950f0fcc23 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sat, 30 Nov 2024 16:52:21 +0800 Subject: [PATCH 14/22] fix --- src/query/sql/src/planner/plans/append.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index 9986db4999b74..76a50a57f7e68 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -114,17 +114,13 @@ pub async fn create_append_plan_from_subquery( let optimized_append = optimize_append(insert_plan, source, metadata.clone(), ctx.as_ref())?; - let plan = Plan::Append { + Plan::Append { s_expr: Box::new(optimized_append), metadata: metadata.clone(), stage_table_info: None, overwrite, forbid_occ_retry, - }; - let opt_ctx = OptimizerContext::new(ctx.clone(), metadata) - .with_enable_distributed_optimization(!ctx.get_cluster().is_empty()); - - optimize(opt_ctx, plan).await + } } impl Hash for Append { From e586a4373f2427339eec4002e03df8f03547b285 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Sat, 30 Nov 2024 17:02:35 +0800 Subject: [PATCH 15/22] make lint --- src/query/sql/src/planner/optimizer/optimizer.rs | 5 ++++- src/query/sql/src/planner/plans/append.rs | 6 ++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 1b5cad5610dc3..717fa94c49d66 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -611,6 +611,9 @@ pub fn optimize_append( let exchange = Arc::new(RelOperator::Exchange(Exchange::Merge)); Ok(SExpr::create_unary(exchange, Arc::new(copy_into))) } - false => Ok(SExpr::create_unary(Arc::new(append.into()), Arc::new(source))), + false => Ok(SExpr::create_unary( + Arc::new(append.into()), + Arc::new(source), + )), } } diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index 76a50a57f7e68..2676f47e0922a 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -43,9 +43,7 @@ use super::RelOp; use crate::executor::physical_plans::PhysicalAppend; use crate::executor::PhysicalPlan; use crate::executor::PhysicalPlanBuilder; -use crate::optimizer::optimize; use crate::optimizer::optimize_append; -use crate::optimizer::OptimizerContext; use crate::optimizer::SExpr; use crate::ColumnBinding; use crate::IndexType; @@ -114,13 +112,13 @@ pub async fn create_append_plan_from_subquery( let optimized_append = optimize_append(insert_plan, source, metadata.clone(), ctx.as_ref())?; - Plan::Append { + Ok(Plan::Append { s_expr: Box::new(optimized_append), metadata: metadata.clone(), stage_table_info: None, overwrite, forbid_occ_retry, - } + }) } impl Hash for Append { From 286fb36ce52c226f2d4ac93fa4868ba1a6093542 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 2 Dec 2024 11:14:57 +0800 Subject: [PATCH 16/22] fix output schema --- .../interpreters/access/privilege_access.rs | 39 ++++++-------- .../src/interpreters/interpreter_append.rs | 52 +++++-------------- .../src/interpreters/interpreter_factory.rs | 8 ++- .../sql/src/planner/binder/copy_into_table.rs | 10 +++- src/query/sql/src/planner/binder/insert.rs | 3 +- .../sql/src/planner/optimizer/optimizer.rs | 4 ++ src/query/sql/src/planner/plans/append.rs | 21 ++++++-- src/query/sql/src/planner/plans/plan.rs | 7 ++- 8 files changed, 72 insertions(+), 72 deletions(-) diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index b4a1c522e1388..7e62f0d2d3d6d 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -40,6 +40,7 @@ use databend_common_sql::plans::OptimizeCompactBlock; use databend_common_sql::plans::PresignAction; use databend_common_sql::plans::Recluster; use databend_common_sql::plans::RewriteKind; +use databend_common_sql::BindContext; use databend_common_sql::Planner; use databend_common_users::RoleCacheManager; use databend_common_users::UserApiProvider; @@ -1002,18 +1003,22 @@ impl AccessChecker for PrivilegeAccess { self.validate_access(&GrantObject::Global, UserPrivilegeType::Super, false, false) .await?; } - // Others. - // Plan::Insert(plan) => { - // let target_table_privileges = if plan.overwrite { - // vec![UserPrivilegeType::Insert, UserPrivilegeType::Delete] - // } else { - // vec![UserPrivilegeType::Insert] - // }; - // for privilege in target_table_privileges { - // self.validate_table_access(&plan.catalog, &plan.database, &plan.table, privilege, false, false).await?; - // } - // self.validate_insert_source(ctx, &plan.source).await?; - // } + Plan::Append { s_expr, target_table_index,metadata,overwrite,.. } => { + let target_table_privileges = if *overwrite { + vec![UserPrivilegeType::Insert, UserPrivilegeType::Delete] + } else { + vec![UserPrivilegeType::Insert] + }; + let (catalog, database, table) = { + let metadata_guard = metadata.read(); + let table_entry = metadata_guard.table(*target_table_index); + (table_entry.catalog().to_string(), table_entry.database().to_string(), table_entry.name().to_string()) + }; + for privilege in target_table_privileges { + self.validate_table_access(&catalog, &database, &table, privilege, false, false).await?; + } + self.check(ctx, &Plan::Query { s_expr:s_expr.clone(), metadata: metadata.clone(), bind_context: Box::new(BindContext::new()), rewrite_kind: None, formatted_ast: None, ignore_result: false }).await?; + } Plan::InsertMultiTable(plan) => { let target_table_privileges = if plan.overwrite { vec![UserPrivilegeType::Insert, UserPrivilegeType::Delete] @@ -1164,16 +1169,6 @@ impl AccessChecker for PrivilegeAccess { self.validate_access(&GrantObject::Global, UserPrivilegeType::Alter, false, false) .await?; } - Plan::Append { .. } => { - // match &plan.source{ - - // } - // self.validate_stage_access(&plan.stage_table_info.stage_info, UserPrivilegeType::Read).await?; - // self.validate_table_access(&plan.catalog_name, &plan.database_name, &plan.table_name, UserPrivilegeType::Insert, false, false).await?; - // if let Some(query) = &plan.query { - // self.check(ctx, query).await?; - // } - } Plan::CopyIntoLocation(plan) => { self.validate_stage_access(&plan.stage, UserPrivilegeType::Write).await?; let from = plan.from.clone(); diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index bde7e5f484b35..588706265f75e 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -17,7 +17,6 @@ use std::sync::Arc; use databend_common_catalog::lock::LockTableOption; use databend_common_catalog::plan::StageTableInfo; use databend_common_catalog::table::TableExt; -use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::types::Int32Type; use databend_common_expression::types::StringType; @@ -28,8 +27,7 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::PhysicalPlanBuilder; use databend_common_sql::optimizer::SExpr; use databend_common_sql::plans::AppendType; -use databend_common_sql::plans::RelOperator; -use log::debug; +use databend_common_sql::IndexType; use log::info; use crate::interpreters::common::check_deduplicate_label; @@ -41,7 +39,6 @@ use crate::pipelines::PipelineBuilder; use crate::schedulers::build_query_pipeline_without_render_result_set; use crate::sessions::QueryContext; use crate::sessions::TableContext; -use crate::sql::plans::Append; use crate::sql::MetadataRef; use crate::stream::DataBlockStream; @@ -49,9 +46,11 @@ pub struct AppendInterpreter { ctx: Arc, s_expr: SExpr, metadata: MetadataRef, + target_table_index: IndexType, stage_table_info: Option>, overwrite: bool, - col_type_modified: bool, + forbid_occ_retry: bool, + append_type: AppendType, } #[async_trait::async_trait] @@ -67,24 +66,12 @@ impl Interpreter for AppendInterpreter { #[fastrace::trace] #[async_backtrace::framed] async fn execute2(&self) -> Result { - debug!("ctx.id" = self.ctx.get_id().as_str(); "append_interpreter_execute"); if check_deduplicate_label(self.ctx.clone()).await? { return Ok(PipelineBuildResult::create()); } - - let append: Append = match &self.s_expr.plan() { - RelOperator::Append(append) => append.clone(), - RelOperator::Exchange(_) => self.s_expr.child(0).unwrap().plan().clone().try_into()?, - plan => { - return Err(ErrorCode::Internal(format!( - "AppendInterpreter: unexpected plan type: {:?}", - plan - ))); - } - }; let (target_table, catalog, database, table) = { let metadata = self.metadata.read(); - let t = metadata.table(append.table_index); + let t = metadata.table(self.target_table_index); ( t.table(), t.catalog().to_string(), @@ -92,19 +79,7 @@ impl Interpreter for AppendInterpreter { t.name().to_string(), ) }; - target_table.check_mutable()?; - if append - .project_columns - .as_ref() - .is_some_and(|p| p.len() != append.required_source_schema.num_fields()) - { - return Err(ErrorCode::BadArguments(format!( - "Fields in select statement is not equal with expected, select fields: {}, insert fields: {}", - append.project_columns.as_ref().unwrap().len(), - append.required_source_schema.num_fields(), - ))); - } // 1. build source and append pipeline let mut build_res = { @@ -137,7 +112,7 @@ impl Interpreter for AppendInterpreter { copied_files_meta_req, update_stream_meta, self.overwrite, - self.col_type_modified, + self.forbid_occ_retry, unsafe { self.ctx.get_settings().get_deduplicate_label()? }, )?; @@ -184,12 +159,7 @@ impl Interpreter for AppendInterpreter { } fn inject_result(&self) -> Result { - let append: Append = match &self.s_expr.plan() { - RelOperator::Append(append) => append.clone(), - RelOperator::Exchange(_) => self.s_expr.child(0).unwrap().plan().clone().try_into()?, - _ => unreachable!(), - }; - match &append.append_type { + match &self.append_type { AppendType::CopyInto => { let blocks = self.get_copy_into_table_result()?; Ok(Box::pin(DataBlockStream::create(None, blocks))) @@ -206,7 +176,9 @@ impl AppendInterpreter { metadata: MetadataRef, stage_table_info: Option>, overwrite: bool, - col_type_modified: bool, + forbid_occ_retry: bool, + append_type: AppendType, + table_index: IndexType, ) -> Result { Ok(AppendInterpreter { ctx, @@ -214,7 +186,9 @@ impl AppendInterpreter { metadata, stage_table_info, overwrite, - col_type_modified, + forbid_occ_retry, + append_type, + target_table_index: table_index, }) } diff --git a/src/query/service/src/interpreters/interpreter_factory.rs b/src/query/service/src/interpreters/interpreter_factory.rs index 08eccee820cca..e1d8fb7b0ddbe 100644 --- a/src/query/service/src/interpreters/interpreter_factory.rs +++ b/src/query/service/src/interpreters/interpreter_factory.rs @@ -161,14 +161,18 @@ impl InterpreterFactory { metadata, stage_table_info, overwrite, - forbid_occ_retry: col_type_modified, + forbid_occ_retry, + append_type, + target_table_index, } => Ok(Arc::new(AppendInterpreter::try_create( ctx, *s_expr.clone(), metadata.clone(), stage_table_info.clone(), *overwrite, - *col_type_modified, + *forbid_occ_retry, + append_type.clone(), + *target_table_index, )?)), Plan::CopyIntoLocation(copy_plan) => Ok(Arc::new( CopyIntoLocationInterpreter::try_create(ctx, copy_plan.clone())?, diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 38e47c724786b..a1dffcb62aeb9 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -107,6 +107,7 @@ impl<'a> Binder { bind_context, copy_into_table_plan, stage_table_info, + AppendType::CopyInto, ) .await } @@ -255,7 +256,6 @@ impl<'a> Binder { required_source_schema: required_values_schema.clone(), required_values_schema: required_values_schema.clone(), project_columns: None, - append_type: AppendType::CopyInto, }; Ok((copy_into_plan, stage_table_info)) } @@ -267,6 +267,7 @@ impl<'a> Binder { bind_ctx: &BindContext, mut copy_into_table_plan: Append, stage_table_info: StageTableInfo, + append_type: AppendType, ) -> Result { let use_query = matches!(&stage_table_info.stage_info.file_format_params, FileFormatParams::Parquet(fmt) if fmt.missing_field_as == NullAs::Error); @@ -343,10 +344,12 @@ impl<'a> Binder { SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(scan)); Ok(Plan::Append { s_expr: Box::new(copy_into), + target_table_index: table_index, metadata: self.metadata.clone(), stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, forbid_occ_retry: false, + append_type, }) } } @@ -467,7 +470,6 @@ impl<'a> Binder { required_values_schema, values_consts: const_columns, required_source_schema: data_schema.clone(), - append_type: AppendType::Insert, project_columns: None, }; @@ -475,6 +477,7 @@ impl<'a> Binder { bind_context, copy_into_table_plan, stage_table_info, + AppendType::Insert, ) .await } @@ -564,15 +567,18 @@ impl<'a> Binder { } } + let target_table_index = copy_into_table_plan.table_index; let copy_into = SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(s_expr)); Ok(Plan::Append { s_expr: Box::new(copy_into), + target_table_index, metadata: self.metadata.clone(), stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, forbid_occ_retry: false, + append_type: AppendType::CopyInto, }) } diff --git a/src/query/sql/src/planner/binder/insert.rs b/src/query/sql/src/planner/binder/insert.rs index 65e85ad05df9a..c54e9aa03f75a 100644 --- a/src/query/sql/src/planner/binder/insert.rs +++ b/src/query/sql/src/planner/binder/insert.rs @@ -182,7 +182,6 @@ impl Binder { required_values_schema: schema.clone(), values_consts: vec![], required_source_schema: schema, - append_type: AppendType::Insert, project_columns, }; @@ -195,6 +194,8 @@ impl Binder { stage_table_info: None, overwrite: *overwrite, forbid_occ_retry: false, + append_type: AppendType::Insert, + target_table_index: table_index, }) } } diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index 717fa94c49d66..2a14cceeb9e80 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -294,6 +294,8 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result stage_table_info, overwrite, forbid_occ_retry, + append_type, + target_table_index, } => { let append: Append = s_expr.plan().clone().try_into()?; let source = s_expr.child(0)?.clone(); @@ -310,6 +312,8 @@ pub async fn optimize(mut opt_ctx: OptimizerContext, plan: Plan) -> Result stage_table_info, overwrite, forbid_occ_retry, + append_type, + target_table_index, }) } Plan::DataMutation { s_expr, .. } => optimize_mutation(opt_ctx, *s_expr).await, diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index 2676f47e0922a..5196a639f7246 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -57,10 +57,9 @@ pub struct Append { pub values_consts: Vec, pub required_source_schema: DataSchemaRef, pub project_columns: Option>, - pub append_type: AppendType, } -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, Debug)] pub enum AppendType { Insert, CopyInto, @@ -107,7 +106,6 @@ pub async fn create_append_plan_from_subquery( values_consts: vec![], required_source_schema: target_schema, project_columns, - append_type: AppendType::Insert, }; let optimized_append = optimize_append(insert_plan, source, metadata.clone(), ctx.as_ref())?; @@ -118,6 +116,8 @@ pub async fn create_append_plan_from_subquery( stage_table_info: None, overwrite, forbid_occ_retry, + append_type: AppendType::Insert, + target_table_index: table_index, }) } @@ -283,8 +283,8 @@ impl Append { ]) } - pub fn schema(&self) -> DataSchemaRef { - match self.append_type { + pub fn schema(append_type: &AppendType) -> DataSchemaRef { + match append_type { AppendType::CopyInto => Self::copy_into_table_schema(), AppendType::Insert => Arc::new(DataSchema::empty()), } @@ -303,6 +303,17 @@ impl PhysicalPlanBuilder { s_expr: &SExpr, plan: &crate::plans::Append, ) -> Result { + if plan + .project_columns + .as_ref() + .is_some_and(|p| p.len() != plan.required_source_schema.num_fields()) + { + return Err(ErrorCode::BadArguments(format!( + "Fields in select statement is not equal with expected, select fields: {}, insert fields: {}", + plan.project_columns.as_ref().unwrap().len(), + plan.required_source_schema.num_fields(), + ))); + } let target_table = self.metadata.read().table(plan.table_index).table(); let column_set = plan diff --git a/src/query/sql/src/planner/plans/plan.rs b/src/query/sql/src/planner/plans/plan.rs index b124780bae533..3887e13382c85 100644 --- a/src/query/sql/src/planner/plans/plan.rs +++ b/src/query/sql/src/planner/plans/plan.rs @@ -25,6 +25,8 @@ use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::DataSchemaRefExt; +use super::Append; +use super::AppendType; use super::CreateDictionaryPlan; use super::DropDictionaryPlan; use super::RenameDictionaryPlan; @@ -147,6 +149,7 @@ use crate::plans::VacuumDropTablePlan; use crate::plans::VacuumTablePlan; use crate::plans::VacuumTemporaryFilesPlan; use crate::BindContext; +use crate::IndexType; use crate::MetadataRef; #[derive(Clone, Debug)] @@ -244,6 +247,8 @@ pub enum Plan { stage_table_info: Option>, overwrite: bool, forbid_occ_retry: bool, + append_type: AppendType, + target_table_index: IndexType, }, CopyIntoLocation(CopyIntoLocationPlan), @@ -476,7 +481,6 @@ impl Plan { Plan::DescNetworkPolicy(plan) => plan.schema(), Plan::ShowNetworkPolicies(plan) => plan.schema(), Plan::DescPasswordPolicy(plan) => plan.schema(), - // Plan::CopyIntoTable(plan) => plan.schema(), Plan::CopyIntoLocation(plan) => plan.schema(), Plan::CreateTask(plan) => plan.schema(), Plan::DescribeTask(plan) => plan.schema(), @@ -489,6 +493,7 @@ impl Plan { Plan::CallProcedure(plan) => plan.schema(), Plan::InsertMultiTable(plan) => plan.schema(), Plan::DescUser(plan) => plan.schema(), + Plan::Append { append_type, .. } => Append::schema(append_type), _ => Arc::new(DataSchema::empty()), } From ba3d3817fe04bdd85b77a178a3fd3d36f8bd0e13 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 2 Dec 2024 13:31:06 +0800 Subject: [PATCH 17/22] fix ut --- src/query/storages/fuse/src/operations/truncate.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/query/storages/fuse/src/operations/truncate.rs b/src/query/storages/fuse/src/operations/truncate.rs index 72c5aa8c5d127..4ea432861d7d5 100644 --- a/src/query/storages/fuse/src/operations/truncate.rs +++ b/src/query/storages/fuse/src/operations/truncate.rs @@ -37,7 +37,11 @@ impl FuseTable { pipeline: &mut Pipeline, mode: TruncateMode, ) -> Result<()> { - self.build_truncate_pipeline(ctx, pipeline, mode) + if self.read_table_snapshot().await?.is_some() { + self.build_truncate_pipeline(ctx, pipeline, mode) + } else { + Ok(()) + } } #[inline] From 8e3a7fdf3170c5dc96055cd3e149da589e9bc18c Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 2 Dec 2024 14:15:14 +0800 Subject: [PATCH 18/22] fix replace into --- .../src/interpreters/interpreter_replace.rs | 80 ++++++++----------- .../builders/builder_replace_into.rs | 32 -------- .../service/src/pipelines/pipeline_builder.rs | 3 - .../transform_recursive_cte_source.rs | 1 - src/query/sql/src/executor/format.rs | 3 - src/query/sql/src/executor/physical_plan.rs | 11 --- .../sql/src/executor/physical_plan_visitor.rs | 7 -- .../07_0001_replace_with_stage.result | 48 ----------- .../07_0001_replace_with_stage.sh | 57 ------------- 9 files changed, 34 insertions(+), 208 deletions(-) delete mode 100644 tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result delete mode 100755 tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh diff --git a/src/query/service/src/interpreters/interpreter_replace.rs b/src/query/service/src/interpreters/interpreter_replace.rs index 9d7b974932c34..34db039b5af0d 100644 --- a/src/query/service/src/interpreters/interpreter_replace.rs +++ b/src/query/service/src/interpreters/interpreter_replace.rs @@ -30,10 +30,11 @@ use databend_common_sql::executor::physical_plans::Exchange; use databend_common_sql::executor::physical_plans::FragmentKind; use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::physical_plans::OnConflictField; -use databend_common_sql::executor::physical_plans::ReplaceAsyncSourcer; +use databend_common_sql::executor::physical_plans::PhysicalValueScan; use databend_common_sql::executor::physical_plans::ReplaceDeduplicate; use databend_common_sql::executor::physical_plans::ReplaceInto; use databend_common_sql::executor::physical_plans::ReplaceSelectCtx; +use databend_common_sql::executor::physical_plans::Values; use databend_common_sql::executor::PhysicalPlan; use databend_common_sql::plans::InsertInputSource; use databend_common_sql::plans::InsertValue; @@ -373,48 +374,28 @@ impl ReplaceInterpreter { #[async_backtrace::framed] async fn connect_input_source<'a>( &'a self, - _ctx: Arc, - _source: &'a InsertInputSource, - _schema: DataSchemaRef, + ctx: Arc, + source: &'a InsertInputSource, + schema: DataSchemaRef, _purge_info: &mut Option<(Vec, StageInfo, CopyIntoTableOptions)>, ) -> Result { - // match source { - // InsertInputSource::Values(source) => self - // .connect_value_source(schema.clone(), source) - // .map(|root| ReplaceSourceCtx { - // root, - // select_ctx: None, - // update_stream_meta: vec![], - // bind_context: None, - // }), - - // InsertInputSource::SelectPlan(plan) => { - // self.connect_query_plan_source(ctx.clone(), plan).await - // } - // InsertInputSource::Stage(plan) => match *plan.clone() { - // Plan::CopyIntoTable(copy_plan) => { - // let interpreter = - // CopyIntoTableInterpreter::try_create(ctx.clone(), *copy_plan.clone())?; - // let (physical_plan, _) = interpreter.build_physical_plan(©_plan).await?; - - // // TODO optimization: if copy_plan.stage_table_info.files_to_copy is None, there should be a short-cut plan - - // *purge_info = Some(( - // copy_plan.stage_table_info.files_to_copy.unwrap_or_default(), - // copy_plan.stage_table_info.stage_info.clone(), - // copy_plan.stage_table_info.copy_into_table_options.clone(), - // )); - // Ok(ReplaceSourceCtx { - // root: Box::new(physical_plan), - // select_ctx: None, - // update_stream_meta: vec![], - // bind_context: None, - // }) - // } - // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - // }, - // } - todo!() + match source { + InsertInputSource::Values(source) => self + .connect_value_source(schema.clone(), source) + .map(|root| ReplaceSourceCtx { + root, + select_ctx: None, + update_stream_meta: vec![], + bind_context: None, + }), + + InsertInputSource::SelectPlan(plan) => { + self.connect_query_plan_source(ctx.clone(), plan).await + } + InsertInputSource::Stage(_) => Err(ErrorCode::StorageUnsupported( + "stage attachment is deprecated in replace into statement", + )), + } } fn connect_value_source( @@ -422,13 +403,20 @@ impl ReplaceInterpreter { schema: DataSchemaRef, source: &InsertValue, ) -> Result> { - Ok(Box::new(PhysicalPlan::ReplaceAsyncSourcer( - ReplaceAsyncSourcer { - schema, + let values = match source { + InsertValue::Values { rows } => Values::Values(Arc::new(rows.clone())), + InsertValue::RawValues { data, start } => Values::RawValues { + rest_str: Arc::new(data.clone()), + start: *start, + }, + }; + Ok(Box::new(PhysicalPlan::ValueScan(Box::new( + PhysicalValueScan { plan_id: u32::MAX, - source: source.clone(), + values, + output_schema: schema.clone(), }, - ))) + )))) } #[async_backtrace::framed] diff --git a/src/query/service/src/pipelines/builders/builder_replace_into.rs b/src/query/service/src/pipelines/builders/builder_replace_into.rs index 9564103a21d0b..4dc19afd8e6b4 100644 --- a/src/query/service/src/pipelines/builders/builder_replace_into.rs +++ b/src/query/service/src/pipelines/builders/builder_replace_into.rs @@ -39,7 +39,6 @@ use databend_common_pipeline_transforms::processors::build_compact_block_pipelin use databend_common_pipeline_transforms::processors::create_dummy_item; use databend_common_pipeline_transforms::processors::TransformPipelineHelper; use databend_common_sql::executor::physical_plans::MutationKind; -use databend_common_sql::executor::physical_plans::ReplaceAsyncSourcer; use databend_common_sql::executor::physical_plans::ReplaceDeduplicate; use databend_common_sql::executor::physical_plans::ReplaceInto; use databend_common_sql::executor::physical_plans::ReplaceSelectCtx; @@ -69,36 +68,6 @@ impl PipelineBuilder { Ok(cast_needed) } - // build async sourcer pipeline. - pub(crate) fn build_async_sourcer( - &mut self, - _async_sourcer: &ReplaceAsyncSourcer, - ) -> Result<()> { - // self.main_pipeline.add_source( - // |output| { - // let name_resolution_ctx = NameResolutionContext::try_from(self.settings.as_ref())?; - // match &async_sourcer.source { - // InsertValue::Values { rows } => { - // let inner = ValueSource::new(rows.clone(), async_sourcer.schema.clone()); - // AsyncSourcer::create(self.ctx.clone(), output, inner) - // } - // InsertValue::RawValues { data, start } => { - // let inner = RawValueSource::new( - // data.clone(), - // self.ctx.clone(), - // name_resolution_ctx, - // async_sourcer.schema.clone(), - // *start, - // ); - // AsyncSourcer::create(self.ctx.clone(), output, inner) - // } - // } - // }, - // 1, - // )?; - Ok(()) - } - // build replace into pipeline. pub(crate) fn build_replace_into(&mut self, replace: &ReplaceInto) -> Result<()> { let ReplaceInto { @@ -460,7 +429,6 @@ impl RawValueSource { schema: DataSchemaRef, start: usize, ) -> Self { - println!("schema: {:?}", schema); let bind_context = BindContext::new(); let metadata = Arc::new(RwLock::new(Metadata::default())); diff --git a/src/query/service/src/pipelines/pipeline_builder.rs b/src/query/service/src/pipelines/pipeline_builder.rs index e812f9163645c..02cd02d26f7ba 100644 --- a/src/query/service/src/pipelines/pipeline_builder.rs +++ b/src/query/service/src/pipelines/pipeline_builder.rs @@ -199,9 +199,6 @@ impl PipelineBuilder { PhysicalPlan::CopyIntoLocation(copy) => self.build_copy_into_location(copy), // Replace. - PhysicalPlan::ReplaceAsyncSourcer(async_sourcer) => { - self.build_async_sourcer(async_sourcer) - } PhysicalPlan::ReplaceDeduplicate(deduplicate) => self.build_deduplicate(deduplicate), PhysicalPlan::ReplaceInto(replace) => self.build_replace_into(replace), diff --git a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs index 772517afac01c..d24f92c25172c 100644 --- a/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs +++ b/src/query/service/src/pipelines/processors/transforms/transform_recursive_cte_source.rs @@ -327,7 +327,6 @@ async fn create_memory_table_for_cte_scan( | PhysicalPlan::ExchangeSink(_) | PhysicalPlan::Append(_) | PhysicalPlan::CopyIntoLocation(_) - | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) | PhysicalPlan::ColumnMutation(_) diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index 0e18bb8e9309a..e2a013c010ca8 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -369,9 +369,6 @@ fn to_format_tree( PhysicalPlan::RangeJoin(plan) => range_join_to_format_tree(plan, metadata, profs), PhysicalPlan::Append(plan) => format_append(plan), PhysicalPlan::CopyIntoLocation(plan) => copy_into_location(plan), - PhysicalPlan::ReplaceAsyncSourcer(_) => { - Ok(FormatTreeNode::new("ReplaceAsyncSourcer".to_string())) - } PhysicalPlan::ReplaceDeduplicate(_) => { Ok(FormatTreeNode::new("ReplaceDeduplicate".to_string())) } diff --git a/src/query/sql/src/executor/physical_plan.rs b/src/query/sql/src/executor/physical_plan.rs index ff1f69b63b2c8..c455fe9c9d37e 100644 --- a/src/query/sql/src/executor/physical_plan.rs +++ b/src/query/sql/src/executor/physical_plan.rs @@ -62,7 +62,6 @@ use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::Recluster; use crate::executor::physical_plans::RecursiveCteScan; -use crate::executor::physical_plans::ReplaceAsyncSourcer; use crate::executor::physical_plans::ReplaceDeduplicate; use crate::executor::physical_plans::ReplaceInto; use crate::executor::physical_plans::RowFetch; @@ -111,7 +110,6 @@ pub enum PhysicalPlan { CopyIntoLocation(Box), /// Replace - ReplaceAsyncSourcer(ReplaceAsyncSourcer), ReplaceDeduplicate(Box), ReplaceInto(Box), @@ -336,10 +334,6 @@ impl PhysicalPlan { *next_id += 1; plan.input.adjust_plan_id(next_id); } - PhysicalPlan::ReplaceAsyncSourcer(plan) => { - plan.plan_id = *next_id; - *next_id += 1; - } PhysicalPlan::ReplaceDeduplicate(plan) => { plan.plan_id = *next_id; *next_id += 1; @@ -439,7 +433,6 @@ impl PhysicalPlan { PhysicalPlan::CommitSink(v) => v.plan_id, PhysicalPlan::Append(v) => v.plan_id, PhysicalPlan::CopyIntoLocation(v) => v.plan_id, - PhysicalPlan::ReplaceAsyncSourcer(v) => v.plan_id, PhysicalPlan::ReplaceDeduplicate(v) => v.plan_id, PhysicalPlan::ReplaceInto(v) => v.plan_id, PhysicalPlan::CompactSource(v) => v.plan_id, @@ -495,7 +488,6 @@ impl PhysicalPlan { PhysicalPlan::AddStreamColumn(plan) => plan.output_schema(), PhysicalPlan::Mutation(_) | PhysicalPlan::ColumnMutation(_) - | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) | PhysicalPlan::CompactSource(_) @@ -545,7 +537,6 @@ impl PhysicalPlan { PhysicalPlan::RangeJoin(_) => "RangeJoin".to_string(), PhysicalPlan::Append(_) => "Append".to_string(), PhysicalPlan::CopyIntoLocation(_) => "CopyIntoLocation".to_string(), - PhysicalPlan::ReplaceAsyncSourcer(_) => "ReplaceAsyncSourcer".to_string(), PhysicalPlan::ReplaceDeduplicate(_) => "ReplaceDeduplicate".to_string(), PhysicalPlan::ReplaceInto(_) => "Replace".to_string(), PhysicalPlan::MutationSource(_) => "MutationSource".to_string(), @@ -584,7 +575,6 @@ impl PhysicalPlan { | PhysicalPlan::CacheScan(_) | PhysicalPlan::ExchangeSource(_) | PhysicalPlan::CompactSource(_) - | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::Recluster(_) | PhysicalPlan::ValueScan(_) | PhysicalPlan::RecursiveCteScan(_) => Box::new(std::iter::empty()), @@ -674,7 +664,6 @@ impl PhysicalPlan { | PhysicalPlan::CompactSource(_) | PhysicalPlan::CommitSink(_) | PhysicalPlan::Append(_) - | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::ReplaceDeduplicate(_) | PhysicalPlan::ReplaceInto(_) | PhysicalPlan::MutationSource(_) diff --git a/src/query/sql/src/executor/physical_plan_visitor.rs b/src/query/sql/src/executor/physical_plan_visitor.rs index 41984077bcf9b..f08f6a23b53e6 100644 --- a/src/query/sql/src/executor/physical_plan_visitor.rs +++ b/src/query/sql/src/executor/physical_plan_visitor.rs @@ -55,7 +55,6 @@ use crate::executor::physical_plans::PhysicalAppend; use crate::executor::physical_plans::ProjectSet; use crate::executor::physical_plans::RangeJoin; use crate::executor::physical_plans::Recluster; -use crate::executor::physical_plans::ReplaceAsyncSourcer; use crate::executor::physical_plans::ReplaceDeduplicate; use crate::executor::physical_plans::ReplaceInto; use crate::executor::physical_plans::RowFetch; @@ -94,7 +93,6 @@ pub trait PhysicalPlanReplacer { PhysicalPlan::RangeJoin(plan) => self.replace_range_join(plan), PhysicalPlan::Append(plan) => self.replace_append(plan), PhysicalPlan::CopyIntoLocation(plan) => self.replace_copy_into_location(plan), - PhysicalPlan::ReplaceAsyncSourcer(plan) => self.replace_async_sourcer(plan), PhysicalPlan::ReplaceDeduplicate(plan) => self.replace_deduplicate(plan), PhysicalPlan::ReplaceInto(plan) => self.replace_replace_into(plan), PhysicalPlan::MutationSource(plan) => self.replace_mutation_source(plan), @@ -436,10 +434,6 @@ pub trait PhysicalPlanReplacer { }))) } - fn replace_async_sourcer(&mut self, plan: &ReplaceAsyncSourcer) -> Result { - Ok(PhysicalPlan::ReplaceAsyncSourcer(plan.clone())) - } - fn replace_deduplicate(&mut self, plan: &ReplaceDeduplicate) -> Result { let input = self.replace(&plan.input)?; Ok(PhysicalPlan::ReplaceDeduplicate(Box::new( @@ -637,7 +631,6 @@ impl PhysicalPlan { visit(plan); match plan { PhysicalPlan::TableScan(_) - | PhysicalPlan::ReplaceAsyncSourcer(_) | PhysicalPlan::CteScan(_) | PhysicalPlan::RecursiveCteScan(_) | PhysicalPlan::ConstantTableScan(_) diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result b/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result deleted file mode 100644 index 98cb421d64e09..0000000000000 --- a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.result +++ /dev/null @@ -1,48 +0,0 @@ -sample.csv -96 -125 -null -1 'Beijing' 100 China -2 'Shanghai' 80 China -3 'Guangzhou' 60 China -4 'Shenzhen' 70 China -5 'Shenzhen' 55 China -6 'Beijing' 99 China -96 -null -1 'Beijing' 100 China -2 'Shanghai' 80 China -3 'Guangzhou' 60 China -4 'Shenzhen' 70 China -5 'Shenzhen' 55 China -6 'Beijing' 99 China -77 -null -1 'beijing' 2 China -2 'shanghai' 2 China -3 'guangzhou' 2 China -4 'shenzhen' 2 China -5 'shenzhen' 2 China -6 'beijing' 2 China -83 -null -1 'Chengdu' 80 China -2 'shanghai' 2 China -3 'Chongqing' 90 China -4 'shenzhen' 2 China -5 'shenzhen' 2 China -6 'HangZhou' 92 China -9 'Changsha' 91 China -10 'Hong Kong‘ 88 China -{ - "code": 4000, - "message": "duplicated data detected in the values being replaced into (only the first one will be described): at row 7, [\"id\":10]" -} -1 'Chengdu' 80 China -2 'shanghai' 2 China -3 'Chongqing' 90 China -4 'shenzhen' 2 China -5 'shenzhen' 2 China -6 'HangZhou' 92 China -9 'Changsha' 91 China -10 'Hong Kong‘ 88 China diff --git a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh b/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh deleted file mode 100755 index c40c690fff762..0000000000000 --- a/tests/suites/1_stateful/07_stage_attachment/07_0001_replace_with_stage.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. "$CURDIR"/../../../shell_env.sh - -echo "drop table if exists sample_table" | $BENDSQL_CLIENT_CONNECT -echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT - -## Create table -cat </dev/null - -## Copy from internal stage -echo "CREATE STAGE s1 FILE_FORMAT = (TYPE = CSV)" | $BENDSQL_CLIENT_CONNECT -echo "list @s1" | $BENDSQL_CLIENT_CONNECT | awk '{print $1}' - -## Insert with stage use http API -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) VALUES", "stage_attachment": {"location": "@s1/sample.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .stats.write_progress.bytes, .error' - -## list stage has metacache, so we just we aws client to ensure the data are purged -aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 ls s3://testbucket/admin/stage/internal/s1/sample.csv - -echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT - - -# use placeholder (?, ?, ?) -aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample.csv s3://testbucket/admin/stage/internal/s1/sample1.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample1.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' -echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT - -# use placeholder (?, ?, 1+1) -aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_2_columns.csv s3://testbucket/admin/stage/internal/s1/sample2.csv >/dev/null - -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,1+1)", "stage_attachment": {"location": "@s1/sample2.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' -echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT - -aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_3_replace.csv s3://testbucket/admin/stage/internal/s1/sample3.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample3.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.stats.scan_progress.bytes, .error' -echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT - -# duplicate value would show error and would not take effect -aws --endpoint-url ${STORAGE_S3_ENDPOINT_URL} s3 cp s3://testbucket/admin/data/csv/sample_3_duplicate.csv s3://testbucket/admin/stage/internal/s1/sample4.csv >/dev/null -curl -s -u root: -XPOST "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/query" --header 'Content-Type: application/json' -d '{"sql": "replace into sample_table (Id, City, Score) ON(Id) values (?,?,?)", "stage_attachment": {"location": "@s1/sample4.csv", "copy_options": {"purge": "true"}}, "pagination": { "wait_time_secs": 3}}' | jq -r '.error' -echo "select * from sample_table order by id" | $BENDSQL_CLIENT_CONNECT - -### Drop table. -echo "drop table sample_table" | $BENDSQL_CLIENT_CONNECT -echo "drop stage if exists s1" | $BENDSQL_CLIENT_CONNECT From eb94eceb9170274ccb520751a2c60d7e4f9abce4 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 2 Dec 2024 15:37:59 +0800 Subject: [PATCH 19/22] fix --- .../sql/src/planner/binder/copy_into_table.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index a1dffcb62aeb9..d7abfb0a41d39 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -265,7 +265,7 @@ impl<'a> Binder { async fn bind_copy_into_table_from_location( &mut self, bind_ctx: &BindContext, - mut copy_into_table_plan: Append, + mut append_plan: Append, stage_table_info: StageTableInfo, append_type: AppendType, ) -> Result { @@ -276,9 +276,9 @@ impl<'a> Binder { if use_query { let mut select_list = - Vec::with_capacity(copy_into_table_plan.required_source_schema.num_fields()); + Vec::with_capacity(append_plan.required_source_schema.num_fields()); - for dest_field in copy_into_table_plan.required_source_schema.fields().iter() { + for dest_field in append_plan.required_source_schema.fields().iter() { let column = Expr::ColumnRef { span: None, column: ColumnRef { @@ -317,7 +317,7 @@ impl<'a> Binder { self.bind_copy_from_query_into_table( bind_ctx, - copy_into_table_plan, + append_plan, stage_table_info, &select_list, &None, @@ -338,13 +338,13 @@ impl<'a> Binder { let (scan, bind_context) = self.bind_base_table(bind_ctx, "system", table_index, None, &None)?; - copy_into_table_plan.project_columns = Some(bind_context.columns.clone()); + append_plan.project_columns = Some(bind_context.columns.clone()); + let target_table_index = append_plan.table_index; - let copy_into = - SExpr::create_unary(Arc::new(copy_into_table_plan.into()), Arc::new(scan)); + let s_expr = SExpr::create_unary(Arc::new(append_plan.into()), Arc::new(scan)); Ok(Plan::Append { - s_expr: Box::new(copy_into), - target_table_index: table_index, + s_expr: Box::new(s_expr), + target_table_index, metadata: self.metadata.clone(), stage_table_info: Some(Box::new(stage_table_info)), overwrite: false, From 83beee18d1920dde53eba1462f9860aaf6f889cf Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 2 Dec 2024 16:43:13 +0800 Subject: [PATCH 20/22] fix explain replace --- src/query/sql/src/planner/plans/insert.rs | 122 +++++++--------------- 1 file changed, 39 insertions(+), 83 deletions(-) diff --git a/src/query/sql/src/planner/plans/insert.rs b/src/query/sql/src/planner/plans/insert.rs index b04c639464623..a4ff019dc6a25 100644 --- a/src/query/sql/src/planner/plans/insert.rs +++ b/src/query/sql/src/planner/plans/insert.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use databend_common_ast::ast::FormatTreeNode; +use databend_common_exception::ErrorCode; use databend_common_expression::types::StringType; use databend_common_expression::DataBlock; use databend_common_expression::DataSchemaRef; @@ -118,90 +119,45 @@ impl Insert { } pub(crate) fn format_insert_source( - _plan_name: &str, - _source: &InsertInputSource, - _verbose: bool, - _children: Vec, + plan_name: &str, + source: &InsertInputSource, + verbose: bool, + mut children: Vec, ) -> databend_common_exception::Result { - // match source { - // InsertInputSource::SelectPlan(plan) => { - // if let Plan::Query { - // s_expr, metadata, .. - // } = &**plan - // { - // let metadata = &*metadata.read(); - // let sub_tree = s_expr.to_format_tree(metadata, verbose)?; - // children.push(sub_tree); - - // return Ok(FormatTreeNode::with_children( - // format!("{plan_name} (subquery):"), - // children, - // ) - // .format_pretty()?); - // } - // Ok(String::new()) - // } - // InsertInputSource::Values(values) => match values { - // InsertValue::Values { .. } => Ok(FormatTreeNode::with_children( - // format!("{plan_name} (values):"), - // children, - // ) - // .format_pretty()?), - // InsertValue::RawValues { .. } => Ok(FormatTreeNode::with_children( - // format!("{plan_name} (rawvalues):"), - // children, - // ) - // .format_pretty()?), - // }, - // InsertInputSource::Stage(plan) => match *plan.clone() { - // Plan::CopyIntoTable(copy_plan) => { - // let CopyIntoTablePlan { - // no_file_to_copy, - // from_attachment, - // required_values_schema, - // required_source_schema, - // write_mode, - // validation_mode, - // stage_table_info, - // enable_distributed, - // .. - // } = &*copy_plan; - // let required_values_schema = required_values_schema - // .fields() - // .iter() - // .map(|field| field.name().to_string()) - // .collect::>() - // .join(","); - // let required_source_schema = required_source_schema - // .fields() - // .iter() - // .map(|field| field.name().to_string()) - // .collect::>() - // .join(","); - // let stage_node = vec![ - // FormatTreeNode::new(format!("no_file_to_copy: {no_file_to_copy}")), - // FormatTreeNode::new(format!("from_attachment: {from_attachment}")), - // FormatTreeNode::new(format!( - // "required_values_schema: [{required_values_schema}]" - // )), - // FormatTreeNode::new(format!( - // "required_source_schema: [{required_source_schema}]" - // )), - // FormatTreeNode::new(format!("write_mode: {write_mode}")), - // FormatTreeNode::new(format!("validation_mode: {validation_mode}")), - // FormatTreeNode::new(format!("stage_table_info: {stage_table_info}")), - // FormatTreeNode::new(format!("enable_distributed: {enable_distributed}")), - // ]; - // children.extend(stage_node); - // Ok( - // FormatTreeNode::with_children(format!("{plan_name} (stage):"), children) - // .format_pretty()?, - // ) - // } - // _ => unreachable!("plan in InsertInputSource::Stag must be CopyIntoTable"), - // }, - // } - todo!() + match source { + InsertInputSource::SelectPlan(plan) => { + if let Plan::Query { + s_expr, metadata, .. + } = &**plan + { + let metadata = &*metadata.read(); + let sub_tree = s_expr.to_format_tree(metadata, verbose)?; + children.push(sub_tree); + + return Ok(FormatTreeNode::with_children( + format!("{plan_name} (subquery):"), + children, + ) + .format_pretty()?); + } + Ok(String::new()) + } + InsertInputSource::Values(values) => match values { + InsertValue::Values { .. } => Ok(FormatTreeNode::with_children( + format!("{plan_name} (values):"), + children, + ) + .format_pretty()?), + InsertValue::RawValues { .. } => Ok(FormatTreeNode::with_children( + format!("{plan_name} (rawvalues):"), + children, + ) + .format_pretty()?), + }, + InsertInputSource::Stage(_) => Err(ErrorCode::StorageUnsupported( + "stage attachment is deprecated in replace into statement", + )), + } } impl std::fmt::Debug for Insert { From bd0ca100419296eb32c37cfe6d5e874bacb50637 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Mon, 2 Dec 2024 20:22:34 +0800 Subject: [PATCH 21/22] refactor explain --- .../interpreters/access/privilege_access.rs | 8 +-- .../src/interpreters/interpreter_append.rs | 13 +--- .../src/interpreters/interpreter_explain.rs | 12 +++- src/query/sql/src/executor/format.rs | 60 +++++++++++++++++-- src/query/sql/src/planner/plans/append.rs | 15 +++++ .../mode/standalone/explain/explain.test | 21 ++++--- .../standalone/explain/explain_sequence.test | 28 +++++---- .../mode/standalone/explain/insert.test | 34 ++++++----- 8 files changed, 135 insertions(+), 56 deletions(-) diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index 707f3f71486a7..90a7032dba64d 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -34,6 +34,7 @@ use databend_common_meta_app::tenant::Tenant; use databend_common_meta_types::seq_value::SeqV; use databend_common_sql::binder::MutationType; use databend_common_sql::optimizer::get_udf_names; +use databend_common_sql::plans::Append; use databend_common_sql::plans::InsertInputSource; use databend_common_sql::plans::Mutation; use databend_common_sql::plans::OptimizeCompactBlock; @@ -1009,11 +1010,8 @@ impl AccessChecker for PrivilegeAccess { } else { vec![UserPrivilegeType::Insert] }; - let (catalog, database, table) = { - let metadata_guard = metadata.read(); - let table_entry = metadata_guard.table(*target_table_index); - (table_entry.catalog().to_string(), table_entry.database().to_string(), table_entry.name().to_string()) - }; + let (_, catalog, database, table) = + Append::target_table(metadata, *target_table_index); for privilege in target_table_privileges { self.validate_table_access(&catalog, &database, &table, privilege, false, false).await?; } diff --git a/src/query/service/src/interpreters/interpreter_append.rs b/src/query/service/src/interpreters/interpreter_append.rs index 588706265f75e..daac037dd749e 100644 --- a/src/query/service/src/interpreters/interpreter_append.rs +++ b/src/query/service/src/interpreters/interpreter_append.rs @@ -26,6 +26,7 @@ use databend_common_expression::SendableDataBlockStream; use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_sql::executor::PhysicalPlanBuilder; use databend_common_sql::optimizer::SExpr; +use databend_common_sql::plans::Append; use databend_common_sql::plans::AppendType; use databend_common_sql::IndexType; use log::info; @@ -69,16 +70,8 @@ impl Interpreter for AppendInterpreter { if check_deduplicate_label(self.ctx.clone()).await? { return Ok(PipelineBuildResult::create()); } - let (target_table, catalog, database, table) = { - let metadata = self.metadata.read(); - let t = metadata.table(self.target_table_index); - ( - t.table(), - t.catalog().to_string(), - t.database().to_string(), - t.name().to_string(), - ) - }; + let (target_table, catalog, database, table) = + Append::target_table(&self.metadata, self.target_table_index); target_table.check_mutable()?; // 1. build source and append pipeline diff --git a/src/query/service/src/interpreters/interpreter_explain.rs b/src/query/service/src/interpreters/interpreter_explain.rs index cff227967122b..59e83833f8952 100644 --- a/src/query/service/src/interpreters/interpreter_explain.rs +++ b/src/query/service/src/interpreters/interpreter_explain.rs @@ -105,7 +105,6 @@ impl Interpreter for ExplainInterpreter { self.explain_query(s_expr, metadata, bind_context, formatted_ast) .await? } - // Plan::Insert(insert_plan) => insert_plan.explain(self.config.verbose).await?, Plan::Replace(replace_plan) => replace_plan.explain(self.config.verbose).await?, Plan::CreateTable(plan) => match &plan.as_select { Some(box Plan::Query { @@ -151,6 +150,17 @@ impl Interpreter for ExplainInterpreter { let plan = interpreter.build_physical_plan(&mutation, None).await?; self.explain_physical_plan(&plan, metadata, &None).await? } + Plan::Append { + s_expr, metadata, .. + } => { + let mut physical_plan_builder = + PhysicalPlanBuilder::new(metadata.clone(), self.ctx.clone(), false); + let physical_plan = physical_plan_builder + .build(s_expr, Default::default()) + .await?; + self.explain_physical_plan(&physical_plan, metadata, &None) + .await? + } _ => self.explain_plan(&self.plan)?, }, diff --git a/src/query/sql/src/executor/format.rs b/src/query/sql/src/executor/format.rs index e2a013c010ca8..31ab72ee7ebcb 100644 --- a/src/query/sql/src/executor/format.rs +++ b/src/query/sql/src/executor/format.rs @@ -26,6 +26,8 @@ use databend_common_pipeline_core::processors::PlanProfile; use itertools::Itertools; use super::physical_plans::AddStreamColumn; +use super::physical_plans::PhysicalValueScan; +use super::physical_plans::Values; use crate::executor::explain::PlanStatsInfo; use crate::executor::physical_plans::AggregateExpand; use crate::executor::physical_plans::AggregateFinal; @@ -367,7 +369,7 @@ fn to_format_tree( PhysicalPlan::ProjectSet(plan) => project_set_to_format_tree(plan, metadata, profs), PhysicalPlan::Udf(plan) => udf_to_format_tree(plan, metadata, profs), PhysicalPlan::RangeJoin(plan) => range_join_to_format_tree(plan, metadata, profs), - PhysicalPlan::Append(plan) => format_append(plan), + PhysicalPlan::Append(plan) => append_to_format_tree(plan, metadata, profs), PhysicalPlan::CopyIntoLocation(plan) => copy_into_location(plan), PhysicalPlan::ReplaceDeduplicate(_) => { Ok(FormatTreeNode::new("ReplaceDeduplicate".to_string())) @@ -478,9 +480,7 @@ fn to_format_tree( )) } PhysicalPlan::AsyncFunction(plan) => async_function_to_format_tree(plan, metadata, profs), - PhysicalPlan::ValueScan(plan) => { - Ok(FormatTreeNode::new(format!("ValueScan: {}", plan.plan_id))) - } + PhysicalPlan::ValueScan(plan) => value_scan_to_format_tree(plan, metadata, profs), } } @@ -686,8 +686,56 @@ fn format_add_stream_column( to_format_tree(&plan.input, metadata, profs) } -fn format_append(plan: &PhysicalAppend) -> Result> { - Ok(FormatTreeNode::new(format!("Append: {}", plan.table_info))) +fn append_to_format_tree( + plan: &PhysicalAppend, + metadata: &Metadata, + profs: &HashMap, +) -> Result> { + let mut children = vec![]; + let target_table = FormatTreeNode::new(format!( + "target table: [catalog: {}] [desc: {}]", + plan.table_info.catalog_info.name_ident.catalog_name, plan.table_info.desc + )); + children.push(target_table); + let required_columns = + format_output_columns(plan.required_values_schema.clone(), metadata, false); + children.push(FormatTreeNode::new(format!( + "required columns: [{}]", + required_columns + ))); + + children.push(to_format_tree(&plan.input, metadata, profs)?); + Ok(FormatTreeNode::with_children( + "Append".to_string(), + children, + )) +} + +fn value_scan_to_format_tree( + plan: &PhysicalValueScan, + _metadata: &Metadata, + _profs: &HashMap, +) -> Result> { + let mut children = vec![]; + match &plan.values { + Values::Values(values) => { + children.push(FormatTreeNode::new(format!( + "values: [{}] rows", + values.len() + ))); + } + Values::RawValues { rest_str, start } => { + children.push(FormatTreeNode::new(format!( + "raw values: string length [{}], start [{}]", + rest_str.len(), + start + ))); + } + } + Ok(FormatTreeNode::with_children( + "ValueScan".to_string(), + children, + )) } fn copy_into_location(_: &CopyIntoLocation) -> Result> { diff --git a/src/query/sql/src/planner/plans/append.rs b/src/query/sql/src/planner/plans/append.rs index 5196a639f7246..c99ea9c285789 100644 --- a/src/query/sql/src/planner/plans/append.rs +++ b/src/query/sql/src/planner/plans/append.rs @@ -47,6 +47,7 @@ use crate::optimizer::optimize_append; use crate::optimizer::SExpr; use crate::ColumnBinding; use crate::IndexType; +use crate::MetadataRef; #[derive(Clone, PartialEq, Eq)] pub struct Append { @@ -289,6 +290,20 @@ impl Append { AppendType::Insert => Arc::new(DataSchema::empty()), } } + + pub fn target_table( + metadata: &MetadataRef, + table_index: IndexType, + ) -> (Arc, String, String, String) { + let metadata = metadata.read(); + let t = metadata.table(table_index); + ( + t.table(), + t.catalog().to_string(), + t.database().to_string(), + t.name().to_string(), + ) + } } impl Operator for Append { diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index 940f72d817bf0..fef40733306bd 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -1615,15 +1615,18 @@ HashJoin query T explain insert into t2 select * from t1; ---- -InsertPlan (subquery): -├── table: default.default.t2 -├── inserted columns: [t2.a (#0),t2.b (#1),t2.c (#2)] -├── overwrite: false -└── Scan - ├── table: default.t1 - ├── filters: [] - ├── order by: [] - └── limit: NONE +Append +├── target table: [catalog: default] [desc: 'default'.'t2'] +├── required columns: [#a, #b, #c] +└── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#3), b (#4), c (#5)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 0.00 statement ok diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain_sequence.test b/tests/sqllogictests/suites/mode/standalone/explain/explain_sequence.test index 839f63dfbb3d5..826aad5b10878 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain_sequence.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain_sequence.test @@ -13,16 +13,22 @@ CREATE TABLE tmp(a int, b uint64, c int); query T explain INSERT INTO tmp select 10,nextval(seq),20 from numbers(3); ---- -InsertPlan (subquery): -├── table: default.default.tmp -├── inserted columns: [tmp.a (#0),tmp.b (#1),tmp.c (#2)] -├── overwrite: false +Append +├── target table: [catalog: default] [desc: 'default'.'tmp'] +├── required columns: [#a, #b, #c] └── EvalScalar - ├── scalars: [10 AS (#1), nextval(seq) (#2) AS (#2), 20 AS (#3)] + ├── output columns: [nextval(seq) (#5), 10 (#4), 20 (#6)] + ├── expressions: [10, 20] + ├── estimated rows: 3.00 └── AsyncFunction - ├── scalars: [nextval(seq) AS (#2)] - └── Scan - ├── table: system.numbers - ├── filters: [] - ├── order by: [] - └── limit: NONE + ├── output columns: [nextval(seq) (#5)] + ├── estimated rows: 3.00 + └── TableScan + ├── table: default.system.numbers + ├── output columns: [] + ├── read rows: 3 + ├── read size: < 1 KiB + ├── partitions total: 1 + ├── partitions scanned: 1 + ├── push downs: [filters: [], limit: NONE] + └── estimated rows: 3.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/insert.test b/tests/sqllogictests/suites/mode/standalone/explain/insert.test index 58ebfcaa4b56c..b5a9c39f052ba 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/insert.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/insert.test @@ -13,25 +13,31 @@ create table t2(a int, b int); query T explain insert overwrite t1 values(1, 2), (2, 3), (3, 4), (8, 9); ---- -InsertPlan (values): -├── table: default.default.t1 -├── inserted columns: [t1.a (#0),t1.b (#1)] -└── overwrite: true +Append +├── target table: [catalog: default] [desc: 'default'.'t1'] +├── required columns: [#a, #b] +└── ValueScan + └── values: [4] rows query T explain insert into t2 select * from t1 where a=1; ---- -InsertPlan (subquery): -├── table: default.default.t2 -├── inserted columns: [t2.a (#0),t2.b (#1)] -├── overwrite: false +Append +├── target table: [catalog: default] [desc: 'default'.'t2'] +├── required columns: [#a, #b] └── Filter - ├── filters: [eq(t1.a (#0), 1)] - └── Scan - ├── table: default.t1 - ├── filters: [eq(t1.a (#0), 1)] - ├── order by: [] - └── limit: NONE + ├── output columns: [t1.a (#2), t1.b (#3)] + ├── filters: [is_true(t1.a (#2) = 1)] + ├── estimated rows: 0.00 + └── TableScan + ├── table: default.default.t1 + ├── output columns: [a (#2), b (#3)] + ├── read rows: 0 + ├── read size: 0 + ├── partitions total: 0 + ├── partitions scanned: 0 + ├── push downs: [filters: [is_true(t1.a (#2) = 1)], limit: NONE] + └── estimated rows: 0.00 From 7f2c321aa8619242f9cfcaa2654f053c57e47a32 Mon Sep 17 00:00:00 2001 From: sky <3374614481@qq.com> Date: Tue, 3 Dec 2024 11:57:20 +0800 Subject: [PATCH 22/22] fix no file to copy --- .../bind_table_reference/bind_location.rs | 1 + .../sql/src/planner/binder/copy_into_table.rs | 1 + src/query/sql/src/planner/binder/table.rs | 33 +++++++++++++++---- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_location.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_location.rs index 72168a47f4589..22c8cb0d15d50 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_location.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_location.rs @@ -75,6 +75,7 @@ impl Binder { alias, None, options.case_sensitive.unwrap_or(false), + None, ) .await }) diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index d7abfb0a41d39..9245f8b46832b 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -504,6 +504,7 @@ impl<'a> Binder { alias, stage_table_info.files_to_copy.clone(), case_sensitive, + Some(stage_table_info.schema.clone()), ) .await?; diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index a7560c519ab58..135d7a3204804 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -31,6 +31,7 @@ use databend_common_ast::ast::TemporalClause; use databend_common_ast::ast::TimeTravelPoint; use databend_common_ast::Span; use databend_common_catalog::catalog_kind::CATALOG_DEFAULT; +use databend_common_catalog::plan::StageTableInfo; use databend_common_catalog::table::NavigationPoint; use databend_common_catalog::table::Table; use databend_common_catalog::table::TimeNavigation; @@ -45,6 +46,7 @@ use databend_common_expression::types::NumberDataType; use databend_common_expression::ConstantFolder; use databend_common_expression::DataField; use databend_common_expression::FunctionContext; +use databend_common_expression::TableSchemaRef; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::IndexMeta; @@ -53,6 +55,7 @@ use databend_common_meta_app::tenant::Tenant; use databend_common_meta_types::MetaId; use databend_common_storage::StageFileInfo; use databend_common_storage::StageFilesInfo; +use databend_common_storages_stage::StageTable; use databend_storages_common_table_meta::table::ChangeType; use log::info; use parking_lot::RwLock; @@ -116,18 +119,34 @@ impl Binder { alias: &Option, files_to_copy: Option>, case_sensitive: bool, + inferred_schema: Option, ) -> Result<(SExpr, BindContext)> { let start = std::time::Instant::now(); let max_column_position = self.metadata.read().get_max_column_position(); - let table = table_ctx - .create_stage_table( - stage_info, + let table = match files_to_copy.as_ref().is_some_and(|files| files.is_empty()) { + true => StageTable::try_create(StageTableInfo { + schema: inferred_schema.unwrap(), + default_values: None, files_info, + stage_info, files_to_copy, - max_column_position, - case_sensitive, - ) - .await?; + duplicated_files_detected: vec![], + is_select: false, + copy_into_location_options: Default::default(), + copy_into_table_options: Default::default(), + })?, + false => { + table_ctx + .create_stage_table( + stage_info, + files_info, + files_to_copy, + max_column_position, + case_sensitive, + ) + .await? + } + }; let table_alias_name = if let Some(table_alias) = alias { Some(normalize_identifier(&table_alias.name, &self.name_resolution_ctx).name)