diff --git a/Cargo.lock b/Cargo.lock index 1e8a29b614e70..9356902ebbbec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4508,6 +4508,7 @@ dependencies = [ "databend-storages-common-stage", "databend-storages-common-table-meta", "futures", + "jiff 0.2.13", "log", "opendal", "parquet", diff --git a/src/query/catalog/src/plan/datasource/datasource_info/stage.rs b/src/query/catalog/src/plan/datasource/datasource_info/stage.rs index 8510155364460..5ef30ef013e35 100644 --- a/src/query/catalog/src/plan/datasource/datasource_info/stage.rs +++ b/src/query/catalog/src/plan/datasource/datasource_info/stage.rs @@ -47,6 +47,7 @@ pub struct StageTableInfo { pub duplicated_files_detected: Vec, pub is_select: bool, pub copy_into_table_options: CopyIntoTableOptions, + pub is_variant: bool, // copy into location only pub copy_into_location_ordered: bool, diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 1b722e676968c..2416db88861e6 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -45,6 +45,7 @@ use crate::values::Column; use crate::values::Scalar; use crate::values::ScalarRef; use crate::ColumnBuilder; +use crate::TableDataType; /// JSONB bytes representation of `null`. pub const JSONB_NULL: &[u8] = &[0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; @@ -215,7 +216,12 @@ impl VariantType { } } -pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec) { +pub fn cast_scalar_to_variant( + scalar: ScalarRef, + tz: &TimeZone, + buf: &mut Vec, + table_data_type: Option<&TableDataType>, +) { let value = match scalar { ScalarRef::Null => jsonb::Value::Null, ScalarRef::EmptyArray => jsonb::Value::Array(vec![]), @@ -264,13 +270,24 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec { - let items = cast_scalars_to_variants(col.iter(), tz); + let typ = if let Some(TableDataType::Array(typ)) = table_data_type { + Some(typ.remove_nullable()) + } else { + None + }; + let items = cast_scalars_to_variants(col.iter(), tz, typ.as_ref()); let owned_jsonb = OwnedJsonb::build_array(items.iter().map(RawJsonb::new)) .expect("failed to build jsonb array"); buf.extend_from_slice(owned_jsonb.as_ref()); return; } ScalarRef::Map(col) => { + let typ = if let Some(TableDataType::Map(typ)) = table_data_type { + Some(typ.remove_nullable()) + } else { + None + }; + let kv_col = KvPair::::try_downcast_column(&col).unwrap(); let mut kvs = BTreeMap::new(); for (k, v) in kv_col.iter() { @@ -284,7 +301,7 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec unreachable!(), }; let mut val = vec![]; - cast_scalar_to_variant(v, tz, &mut val); + cast_scalar_to_variant(v, tz, &mut val, typ.as_ref()); kvs.insert(key, val); } let owned_jsonb = @@ -305,14 +322,43 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec { - let values = cast_scalars_to_variants(fields, tz); - let owned_jsonb = OwnedJsonb::build_object( - values - .iter() - .enumerate() - .map(|(i, bytes)| (format!("{}", i + 1), RawJsonb::new(bytes))), - ) - .expect("failed to build jsonb object from tuple"); + let owned_jsonb = match table_data_type { + Some(TableDataType::Tuple { + fields_name, + fields_type, + }) => { + assert_eq!(fields.len(), fields_type.len()); + let iter = fields.into_iter(); + let mut builder = BinaryColumnBuilder::with_capacity(iter.size_hint().0, 0); + for (scalar, typ) in iter.zip(fields_type) { + cast_scalar_to_variant( + scalar, + tz, + &mut builder.data, + Some(&typ.remove_nullable()), + ); + builder.commit_row(); + } + let values = builder.build(); + OwnedJsonb::build_object( + values + .iter() + .enumerate() + .map(|(i, bytes)| (fields_name[i].clone(), RawJsonb::new(bytes))), + ) + .expect("failed to build jsonb object from tuple") + } + _ => { + let values = cast_scalars_to_variants(fields, tz, None); + OwnedJsonb::build_object( + values + .iter() + .enumerate() + .map(|(i, bytes)| (format!("{}", i + 1), RawJsonb::new(bytes))), + ) + .expect("failed to build jsonb object from tuple") + } + }; buf.extend_from_slice(owned_jsonb.as_ref()); return; } @@ -342,11 +388,12 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec, tz: &TimeZone, + table_data_type: Option<&TableDataType>, ) -> BinaryColumn { let iter = scalars.into_iter(); let mut builder = BinaryColumnBuilder::with_capacity(iter.size_hint().0, 0); for scalar in iter { - cast_scalar_to_variant(scalar, tz, &mut builder.data); + cast_scalar_to_variant(scalar, tz, &mut builder.data, table_data_type); builder.commit_row(); } builder.build() diff --git a/src/query/functions/src/aggregates/aggregate_json_array_agg.rs b/src/query/functions/src/aggregates/aggregate_json_array_agg.rs index d2ebd098ffd54..e2b610ac98620 100644 --- a/src/query/functions/src/aggregates/aggregate_json_array_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_json_array_agg.rs @@ -120,7 +120,7 @@ where continue; } let mut val = vec![]; - cast_scalar_to_variant(v.as_ref(), &tz, &mut val); + cast_scalar_to_variant(v.as_ref(), &tz, &mut val, None); items.push(val); } let owned_jsonb = OwnedJsonb::build_array(items.iter().map(|v| RawJsonb::new(v))) diff --git a/src/query/functions/src/aggregates/aggregate_json_object_agg.rs b/src/query/functions/src/aggregates/aggregate_json_object_agg.rs index 52067c8296b78..79335c02a703e 100644 --- a/src/query/functions/src/aggregates/aggregate_json_object_agg.rs +++ b/src/query/functions/src/aggregates/aggregate_json_object_agg.rs @@ -184,7 +184,7 @@ where continue; } let mut val = vec![]; - cast_scalar_to_variant(v.as_ref(), &tz, &mut val); + cast_scalar_to_variant(v.as_ref(), &tz, &mut val, None); values.push((key, val)); } let owned_jsonb = diff --git a/src/query/functions/src/scalars/variant.rs b/src/query/functions/src/scalars/variant.rs index b00cf8f10508c..fd7c45961030a 100644 --- a/src/query/functions/src/scalars/variant.rs +++ b/src/query/functions/src/scalars/variant.rs @@ -1112,7 +1112,12 @@ pub fn register(registry: &mut FunctionRegistry) { Scalar::Null => Value::Scalar(Scalar::Null), _ => { let mut buf = Vec::new(); - cast_scalar_to_variant(scalar.as_ref(), &ctx.func_ctx.tz, &mut buf); + cast_scalar_to_variant( + scalar.as_ref(), + &ctx.func_ctx.tz, + &mut buf, + None, + ); Value::Scalar(Scalar::Variant(buf)) } }, @@ -1124,7 +1129,7 @@ pub fn register(registry: &mut FunctionRegistry) { } _ => None, }; - let new_col = cast_scalars_to_variants(col.iter(), &ctx.func_ctx.tz); + let new_col = cast_scalars_to_variants(col.iter(), &ctx.func_ctx.tz, None); if let Some(validity) = validity { Value::Column(NullableColumn::new_column( Column::Variant(new_col), @@ -1157,7 +1162,7 @@ pub fn register(registry: &mut FunctionRegistry) { Scalar::Null => Value::Scalar(None), _ => { let mut buf = Vec::new(); - cast_scalar_to_variant(scalar.as_ref(), &ctx.func_ctx.tz, &mut buf); + cast_scalar_to_variant(scalar.as_ref(), &ctx.func_ctx.tz, &mut buf, None); Value::Scalar(Some(buf)) } }, @@ -1167,7 +1172,7 @@ pub fn register(registry: &mut FunctionRegistry) { Column::Nullable(box ref nullable_column) => nullable_column.validity.clone(), _ => Bitmap::new_constant(true, col.len()), }; - let new_col = cast_scalars_to_variants(col.iter(), &ctx.func_ctx.tz); + let new_col = cast_scalars_to_variants(col.iter(), &ctx.func_ctx.tz, None); Value::Column(NullableColumn::new(new_col, validity)) } }, @@ -2132,7 +2137,7 @@ fn json_array_fn(args: &[Value], ctx: &mut EvalContext) -> Value { // if the new value is not a json value, cast it to json. let mut new_val_buf = vec![]; - cast_scalar_to_variant(new_val.clone(), &ctx.func_ctx.tz, &mut new_val_buf); + cast_scalar_to_variant(new_val.clone(), &ctx.func_ctx.tz, &mut new_val_buf, None); let new_val = RawJsonb::new(new_val_buf.as_bytes()); value.object_insert(new_key, &new_val, update_flag) } diff --git a/src/query/service/src/interpreters/interpreter_copy_into_location.rs b/src/query/service/src/interpreters/interpreter_copy_into_location.rs index 588ac6e70772a..a91310c5437a7 100644 --- a/src/query/service/src/interpreters/interpreter_copy_into_location.rs +++ b/src/query/service/src/interpreters/interpreter_copy_into_location.rs @@ -115,6 +115,7 @@ impl CopyIntoLocationInterpreter { copy_into_table_options: Default::default(), stage_root: "".to_string(), copy_into_location_ordered: self.plan.is_ordered, + is_variant: false, }, })); diff --git a/src/query/service/src/sessions/query_ctx.rs b/src/query/service/src/sessions/query_ctx.rs index c210d218cd185..760f7d2a09531 100644 --- a/src/query/service/src/sessions/query_ctx.rs +++ b/src/query/service/src/sessions/query_ctx.rs @@ -1657,30 +1657,56 @@ impl TableContext for QueryContext { }; match stage_info.file_format_params { FileFormatParams::Parquet(..) => { - let mut read_options = ParquetReadOptions::default(); - - if !self.get_settings().get_enable_parquet_page_index()? { - read_options = read_options.with_prune_pages(false); - } + if max_column_position > 1 { + Err(ErrorCode::SemanticError( + "[QUERY-CTX] Query from parquet file only support $1 as column position", + )) + } else if max_column_position == 0 { + let mut read_options = ParquetReadOptions::default(); + let settings = self.query_settings.clone(); + + if !settings.get_enable_parquet_page_index()? { + read_options = read_options.with_prune_pages(false); + } - if !self.get_settings().get_enable_parquet_rowgroup_pruning()? { - read_options = read_options.with_prune_row_groups(false); - } + if !settings.get_enable_parquet_rowgroup_pruning()? { + read_options = read_options.with_prune_row_groups(false); + } - if !self.get_settings().get_enable_parquet_prewhere()? { - read_options = read_options.with_do_prewhere(false); + if !settings.get_enable_parquet_prewhere()? { + read_options = read_options.with_do_prewhere(false); + } + ParquetTable::create( + stage_info.clone(), + files_info, + read_options, + files_to_copy, + self.get_settings(), + self.get_query_kind(), + case_sensitive, + ) + .await + } else { + let schema = Arc::new(TableSchema::new(vec![TableField::new( + "_$1", + TableDataType::Variant, + )])); + let info = StageTableInfo { + schema, + stage_info, + files_info, + files_to_copy, + duplicated_files_detected: vec![], + is_select: true, + default_exprs: None, + copy_into_location_options: Default::default(), + copy_into_table_options: Default::default(), + stage_root, + copy_into_location_ordered: false, + is_variant: true, + }; + StageTable::try_create(info) } - - ParquetTable::create( - stage_info.clone(), - files_info, - read_options, - files_to_copy, - self.get_settings(), - self.get_query_kind(), - case_sensitive, - ) - .await } FileFormatParams::Orc(..) => { let schema = Arc::new(TableSchema::empty()); @@ -1696,6 +1722,7 @@ impl TableContext for QueryContext { copy_into_table_options: Default::default(), stage_root, copy_into_location_ordered: false, + is_variant: false, }; OrcTable::try_create(info).await } @@ -1716,6 +1743,7 @@ impl TableContext for QueryContext { copy_into_table_options: Default::default(), stage_root, copy_into_location_ordered: false, + is_variant: true, }; StageTable::try_create(info) } @@ -1754,6 +1782,7 @@ impl TableContext for QueryContext { copy_into_table_options: Default::default(), stage_root, copy_into_location_ordered: false, + is_variant: false, }; StageTable::try_create(info) } diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 1595e5b1da253..f184387ad52ad 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -225,6 +225,7 @@ impl Binder { copy_into_table_options: stmt.options.clone(), stage_root: "".to_string(), copy_into_location_ordered: false, + is_variant: false, }, values_consts: vec![], required_source_schema: required_values_schema.clone(), @@ -406,6 +407,7 @@ impl Binder { copy_into_table_options: options, stage_root: "".to_string(), copy_into_location_ordered: false, + is_variant: false, }, write_mode, query: None, diff --git a/src/query/storages/parquet/Cargo.toml b/src/query/storages/parquet/Cargo.toml index c8d9f24dc12d0..631baee0af5dd 100644 --- a/src/query/storages/parquet/Cargo.toml +++ b/src/query/storages/parquet/Cargo.toml @@ -32,6 +32,7 @@ databend-storages-common-pruner = { workspace = true } databend-storages-common-stage = { workspace = true } databend-storages-common-table-meta = { workspace = true } futures = { workspace = true } +jiff = { workspace = true } log = { workspace = true } opendal = { workspace = true } parquet = { workspace = true } diff --git a/src/query/storages/parquet/src/copy_into_table/source.rs b/src/query/storages/parquet/src/copy_into_table/source.rs index 28517ed1284c9..94d6459992ab4 100644 --- a/src/query/storages/parquet/src/copy_into_table/source.rs +++ b/src/query/storages/parquet/src/copy_into_table/source.rs @@ -185,6 +185,7 @@ impl Processor for ParquetCopySource { } _ => unreachable!(), } + Ok(()) } } diff --git a/src/query/storages/parquet/src/lib.rs b/src/query/storages/parquet/src/lib.rs index 93d806d3c7c07..1702f66d07a9d 100644 --- a/src/query/storages/parquet/src/lib.rs +++ b/src/query/storages/parquet/src/lib.rs @@ -41,6 +41,7 @@ mod statistics; mod transformer; mod meta; +mod parquet_variant_table; mod schema; pub use copy_into_table::ParquetTableForCopy; @@ -53,6 +54,7 @@ pub use parquet_reader::ParquetFileReader; pub use parquet_reader::ParquetReaderBuilder; pub use parquet_reader::ParquetWholeFileReader; pub use parquet_table::ParquetTable; +pub use parquet_variant_table::ParquetVariantTable; // for it test pub use pruning::ParquetPruner; pub use source::ParquetSource; diff --git a/src/query/storages/parquet/src/parquet_part.rs b/src/query/storages/parquet/src/parquet_part.rs index 785ad037597aa..5482776ee0b60 100644 --- a/src/query/storages/parquet/src/parquet_part.rs +++ b/src/query/storages/parquet/src/parquet_part.rs @@ -22,6 +22,7 @@ use databend_common_catalog::plan::PartInfo; use databend_common_catalog::plan::PartInfoPtr; use databend_common_catalog::plan::PartStatistics; use databend_common_catalog::plan::Partitions; +use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -153,7 +154,7 @@ impl ParquetPart { /// 1. to fully utilize the IO, multiple small files are loaded in one part. /// 2. to avoid OOM, the total size of small files in one part is limited, /// and we need compression_ratio to estimate the uncompressed size. -pub(crate) fn collect_small_file_parts( +fn collect_small_file_parts( small_files: Vec<(String, u64, String)>, mut max_compression_ratio: f64, mut max_compressed_size: u64, @@ -211,7 +212,7 @@ pub(crate) fn collect_small_file_parts( } } -pub(crate) fn collect_file_parts( +fn collect_file_parts( files: Vec<(String, u64, String)>, compress_ratio: f64, partitions: &mut Partitions, @@ -248,3 +249,58 @@ pub(crate) fn collect_file_parts( stats.is_exact = false; } } + +pub(crate) fn collect_parts( + ctx: Arc, + files: Vec<(String, u64, String)>, + compression_ratio: f64, + num_columns_to_read: usize, + total_columns_to_read: usize, +) -> Result<(PartStatistics, Partitions)> { + let mut partitions = Partitions::default(); + let mut stats = PartStatistics::default(); + + let fast_read_bytes = ctx.get_settings().get_parquet_fast_read_bytes()?; + let rowgroup_hint_bytes = ctx.get_settings().get_parquet_rowgroup_hint_bytes()?; + + let mut large_files = vec![]; + let mut small_files = vec![]; + for (location, size, dedup_key) in files.into_iter() { + if size > fast_read_bytes { + large_files.push((location, size, dedup_key)); + } else if size > 0 { + small_files.push((location, size, dedup_key)); + } + } + + collect_file_parts( + large_files, + compression_ratio, + &mut partitions, + &mut stats, + num_columns_to_read, + total_columns_to_read, + rowgroup_hint_bytes, + ); + + if !small_files.is_empty() { + let mut max_compression_ratio = compression_ratio; + let mut max_compressed_size = 0u64; + for part in partitions.partitions.iter() { + let p = part.as_any().downcast_ref::().unwrap(); + max_compression_ratio = max_compression_ratio + .max(p.uncompressed_size() as f64 / p.compressed_size() as f64); + max_compressed_size = max_compressed_size.max(p.compressed_size()); + } + + collect_small_file_parts( + small_files, + max_compression_ratio, + max_compressed_size, + &mut partitions, + &mut stats, + ); + } + + Ok((stats, partitions)) +} diff --git a/src/query/storages/parquet/src/parquet_table/partition.rs b/src/query/storages/parquet/src/parquet_table/partition.rs index 7de3ec8acb55c..2da624092a58c 100644 --- a/src/query/storages/parquet/src/parquet_table/partition.rs +++ b/src/query/storages/parquet/src/parquet_table/partition.rs @@ -21,13 +21,34 @@ use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::FieldIndex; use super::table::ParquetTable; -use crate::parquet_part::collect_file_parts; -use crate::parquet_part::collect_small_file_parts; -use crate::ParquetPart; +use crate::parquet_part::collect_parts; impl ParquetTable { + fn columns_to_read(&self, push_down: Option) -> Vec { + if let Some(prewhere) = PushDownInfo::prewhere_of_push_downs(push_down.as_ref()) { + let (_, prewhere_columns) = prewhere + .prewhere_columns + .to_arrow_projection(&self.schema_descr); + let (_, output_columns) = prewhere + .output_columns + .to_arrow_projection(&self.schema_descr); + let mut columns = HashSet::with_capacity(prewhere_columns.len() + output_columns.len()); + columns.extend(prewhere_columns); + columns.extend(output_columns); + let mut columns = columns.into_iter().collect::>(); + columns.sort(); + columns + } else { + let output_projection = + PushDownInfo::projection_of_push_downs(&self.schema(), push_down.as_ref()); + let (_, columns) = output_projection.to_arrow_projection(&self.schema_descr); + columns + } + } + #[inline] #[async_backtrace::framed] pub(super) async fn do_read_partitions( @@ -35,9 +56,12 @@ impl ParquetTable { ctx: Arc, push_down: Option, ) -> Result<(PartStatistics, Partitions)> { + // It will be used to calculate the memory will be used in reading. + let num_columns_to_read = self.columns_to_read(push_down).len(); + let thread_num = ctx.get_settings().get_max_threads()? as usize; - let file_locations = { + let files = { match &self.files_to_read { Some(files) => files .iter() @@ -55,75 +79,12 @@ impl ParquetTable { } }; - // It will be used to calculate the memory will be used in reading. - let columns_to_read = if let Some(prewhere) = - PushDownInfo::prewhere_of_push_downs(push_down.as_ref()) - { - let (_, prewhere_columns) = prewhere - .prewhere_columns - .to_arrow_projection(&self.schema_descr); - let (_, output_columns) = prewhere - .output_columns - .to_arrow_projection(&self.schema_descr); - let mut columns = HashSet::with_capacity(prewhere_columns.len() + output_columns.len()); - columns.extend(prewhere_columns); - columns.extend(output_columns); - let mut columns = columns.into_iter().collect::>(); - columns.sort(); - columns - } else { - let output_projection = - PushDownInfo::projection_of_push_downs(&self.schema(), push_down.as_ref()); - let (_, columns) = output_projection.to_arrow_projection(&self.schema_descr); - columns - }; - let num_columns_to_read = columns_to_read.len(); - - let mut partitions = Partitions::default(); - let mut stats = PartStatistics::default(); - - let fast_read_bytes = ctx.get_settings().get_parquet_fast_read_bytes()?; - let rowgroup_hint_bytes = ctx.get_settings().get_parquet_rowgroup_hint_bytes()?; - - let mut large_files = vec![]; - let mut small_files = vec![]; - for (location, size, dedup_key) in file_locations.into_iter() { - if size > fast_read_bytes { - large_files.push((location, size, dedup_key)); - } else if size > 0 { - small_files.push((location, size, dedup_key)); - } - } - - collect_file_parts( - large_files, + collect_parts( + ctx, + files, self.compression_ratio, - &mut partitions, - &mut stats, num_columns_to_read, self.schema().num_fields(), - rowgroup_hint_bytes, - ); - - if !small_files.is_empty() { - let mut max_compression_ratio = self.compression_ratio; - let mut max_compressed_size = 0u64; - for part in partitions.partitions.iter() { - let p = part.as_any().downcast_ref::().unwrap(); - max_compression_ratio = max_compression_ratio - .max(p.uncompressed_size() as f64 / p.compressed_size() as f64); - max_compressed_size = max_compressed_size.max(p.compressed_size()); - } - - collect_small_file_parts( - small_files, - max_compression_ratio, - max_compressed_size, - &mut partitions, - &mut stats, - ); - } - - Ok((stats, partitions)) + ) } } diff --git a/src/query/storages/parquet/src/parquet_variant_table/mod.rs b/src/query/storages/parquet/src/parquet_variant_table/mod.rs new file mode 100644 index 0000000000000..e972685c912c5 --- /dev/null +++ b/src/query/storages/parquet/src/parquet_variant_table/mod.rs @@ -0,0 +1,19 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod recordbatch_to_variant; +mod source; +mod table; + +pub use table::ParquetVariantTable; diff --git a/src/query/storages/parquet/src/parquet_variant_table/recordbatch_to_variant.rs b/src/query/storages/parquet/src/parquet_variant_table/recordbatch_to_variant.rs new file mode 100644 index 0000000000000..c8a001556a898 --- /dev/null +++ b/src/query/storages/parquet/src/parquet_variant_table/recordbatch_to_variant.rs @@ -0,0 +1,62 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use arrow_array::RecordBatch; +use databend_common_expression::types::binary::BinaryColumnBuilder; +use databend_common_expression::types::variant::cast_scalar_to_variant; +use databend_common_expression::BlockEntry; +use databend_common_expression::Column; +use databend_common_expression::DataBlock; +use databend_common_expression::DataSchema; +use databend_common_expression::TableDataType; +use databend_common_expression::Value; +use jiff::tz::TimeZone; + +pub fn read_record_batch( + record_batch: RecordBatch, + builder: &mut BinaryColumnBuilder, + tz: &TimeZone, + typ: &TableDataType, +) -> databend_common_exception::Result<()> { + let schema = DataSchema::try_from(record_batch.schema().as_ref())?; + let mut columns = Vec::with_capacity(record_batch.columns().len()); + for (array, field) in record_batch.columns().iter().zip(schema.fields()) { + columns.push(Column::from_arrow_rs(array.clone(), field.data_type())?) + } + let column = Column::Tuple(columns); + for scalar in column.iter() { + cast_scalar_to_variant(scalar, tz, &mut builder.data, Some(typ)); + builder.commit_row() + } + Ok(()) +} + +pub fn record_batch_to_block( + record_batch: RecordBatch, + tz: &TimeZone, + typ: &TableDataType, +) -> databend_common_exception::Result { + let mut builder = BinaryColumnBuilder::with_capacity( + record_batch.num_rows(), + record_batch.get_array_memory_size(), + ); + read_record_batch(record_batch, &mut builder, tz, typ)?; + let column = builder.build(); + let num_rows = column.len(); + let entry = BlockEntry::new( + databend_common_expression::types::DataType::Variant, + Value::Column(Column::Variant(column)), + ); + Ok(DataBlock::new(vec![entry], num_rows)) +} diff --git a/src/query/storages/parquet/src/parquet_variant_table/source.rs b/src/query/storages/parquet/src/parquet_variant_table/source.rs new file mode 100644 index 0000000000000..32a94c24b9f14 --- /dev/null +++ b/src/query/storages/parquet/src/parquet_variant_table/source.rs @@ -0,0 +1,403 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::collections::VecDeque; +use std::sync::Arc; + +use bytes::Bytes; +use databend_common_base::base::Progress; +use databend_common_base::base::ProgressValues; +use databend_common_base::runtime::profile::Profile; +use databend_common_base::runtime::profile::ProfileStatisticsName; +use databend_common_catalog::plan::InternalColumnType; +use databend_common_catalog::query_kind::QueryKind; +use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_expression::types::binary::BinaryColumnBuilder; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NumberColumnBuilder; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::BlockEntry; +use databend_common_expression::Column; +use databend_common_expression::DataBlock; +use databend_common_expression::Scalar; +use databend_common_expression::TableDataType; +use databend_common_expression::TableSchema; +use databend_common_expression::Value; +use databend_common_pipeline_core::processors::Event; +use databend_common_pipeline_core::processors::OutputPort; +use databend_common_pipeline_core::processors::Processor; +use databend_common_pipeline_core::processors::ProcessorPtr; +use databend_common_storage::CopyStatus; +use databend_common_storage::FileStatus; +use databend_common_storage::OperatorRegistry; +use jiff::tz::TimeZone; +use parquet::arrow::arrow_reader::ArrowReaderOptions; +use parquet::arrow::arrow_reader::ParquetRecordBatchReader; +use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use parquet::arrow::parquet_to_arrow_field_levels; +use parquet::arrow::parquet_to_arrow_schema; +use parquet::arrow::ProjectionMask; + +use crate::meta::read_metadata_async_cached; +use crate::parquet_reader::cached_range_full_read; +use crate::parquet_reader::InMemoryRowGroup; +use crate::parquet_variant_table::recordbatch_to_variant::read_record_batch; +use crate::parquet_variant_table::recordbatch_to_variant::record_batch_to_block; +use crate::read_settings::ReadSettings; +use crate::schema::arrow_to_table_schema; +use crate::ParquetFilePart; +use crate::ParquetPart; + +enum State { + Init, + // Reader, start row, location + ReadRowGroup { + readers: VecDeque<(ParquetRecordBatchReader, u64, TableDataType)>, + location: String, + }, + ReadFiles(Vec<(Bytes, String)>), +} + +pub struct ParquetVariantSource { + output: Arc, + scan_progress: Arc, + + // Used for event transforming. + ctx: Arc, + generated_data: Option, + is_finished: bool, + + state: State, + // If the source is used for a copy pipeline, + // we should update copy status when reading small parquet files. + // (Because we cannot collect copy status of small parquet files during `read_partition`). + is_copy: bool, + copy_status: Arc, + internal_columns: Vec, + op_registry: Arc, + batch_size: usize, + + tz: TimeZone, +} + +impl ParquetVariantSource { + #[allow(clippy::too_many_arguments)] + pub fn try_create( + ctx: Arc, + output: Arc, + internal_columns: Vec, + op_registry: Arc, + ) -> Result { + let scan_progress = ctx.get_scan_progress(); + let is_copy = matches!(ctx.get_query_kind(), QueryKind::CopyIntoTable); + let copy_status = ctx.get_copy_status(); + + let settings = ctx.get_settings(); + let tz_string = settings.get_timezone()?; + let tz = TimeZone::get(&tz_string).map_err(|e| { + ErrorCode::InvalidTimezone(format!("[QUERY-CTX] Timezone validation failed: {}", e)) + })?; + + Ok(ProcessorPtr::create(Box::new(Self { + output, + scan_progress, + ctx, + generated_data: None, + is_finished: false, + state: State::Init, + is_copy, + copy_status, + internal_columns, + op_registry, + batch_size: 1000, + tz, + }))) + } +} + +#[async_trait::async_trait] +impl Processor for ParquetVariantSource { + fn name(&self) -> String { + "ParquetSource".to_string() + } + + fn as_any(&mut self) -> &mut dyn Any { + self + } + + fn event(&mut self) -> Result { + if self.is_finished { + self.output.finish(); + return Ok(Event::Finished); + } + + if self.output.is_finished() { + return Ok(Event::Finished); + } + + if !self.output.can_push() { + return Ok(Event::NeedConsume); + } + + match self.generated_data.take() { + None => match &self.state { + State::Init => Ok(Event::Async), + State::ReadRowGroup { .. } => Ok(Event::Sync), + State::ReadFiles(_) => Ok(Event::Sync), + }, + Some(data_block) => { + let progress_values = ProgressValues { + rows: data_block.num_rows(), + bytes: data_block.memory_size(), + }; + self.scan_progress.incr(&progress_values); + Profile::record_usize_profile( + ProfileStatisticsName::ScanBytes, + data_block.memory_size(), + ); + self.output.push_data(Ok(data_block)); + Ok(Event::NeedConsume) + } + } + } + + fn process(&mut self) -> Result<()> { + match std::mem::replace(&mut self.state, State::Init) { + State::ReadRowGroup { + readers: mut vs, + location, + } => { + if let Some((reader, mut start_row, typ)) = vs.front_mut() { + if let Some(batch) = reader.next() { + let mut block = record_batch_to_block(batch?, &self.tz, typ)?; + add_internal_columns( + &self.internal_columns, + location.clone(), + &mut block, + &mut start_row, + ); + + if self.is_copy { + self.copy_status.add_chunk(location.as_str(), FileStatus { + num_rows_loaded: block.num_rows(), + error: None, + }); + } + self.generated_data = Some(block); + } else { + vs.pop_front(); + } + self.state = State::ReadRowGroup { + readers: vs, + location, + }; + } + // Else: The reader is finished. We should try to build another reader. + } + State::ReadFiles(buffers) => { + let mut blocks = Vec::with_capacity(buffers.len()); + for (buffer, path) in buffers { + let mut block = read_small_file(buffer, self.batch_size, &self.tz)?; + + if self.is_copy { + self.copy_status.add_chunk(path.as_str(), FileStatus { + num_rows_loaded: block.num_rows(), + error: None, + }); + } + let mut rows_start = 0; + add_internal_columns( + &self.internal_columns, + path.to_string(), + &mut block, + &mut rows_start, + ); + blocks.push(block); + } + + if !blocks.is_empty() { + self.generated_data = Some(DataBlock::concat(&blocks)?); + } + // Else: no output data is generated. + } + _ => unreachable!(), + } + Ok(()) + } + + #[async_backtrace::framed] + async fn async_process(&mut self) -> Result<()> { + match std::mem::replace(&mut self.state, State::Init) { + State::Init => { + if let Some(part) = self.ctx.get_partition() { + match ParquetPart::from_part(&part)? { + ParquetPart::SmallFiles(parts) => { + // read the small file on parallel + let mut handlers = Vec::with_capacity(parts.len()); + for part in parts { + let (op, path) = + self.op_registry.get_operator_path(part.file.as_str())?; + handlers.push(async move { + let bs = cached_range_full_read( + &op, + path, + part.compressed_size as _, + false, + ) + .await?; + Ok::<_, ErrorCode>((bs, path.to_owned())) + }); + } + let results = futures::future::try_join_all(handlers).await?; + self.state = State::ReadFiles(results); + } + ParquetPart::File(part) => { + let readers = self.get_row_group_readers(part).await?; + if !readers.is_empty() { + self.state = State::ReadRowGroup { + readers, + location: part.file.clone(), + }; + } + } + _ => unreachable!(), + } + } else { + self.is_finished = true; + } + } + _ => unreachable!(), + } + + Ok(()) + } +} + +impl ParquetVariantSource { + async fn get_row_group_readers( + &mut self, + part: &ParquetFilePart, + ) -> Result> { + let (op, path) = self.op_registry.get_operator_path(part.file.as_str())?; + let meta = + read_metadata_async_cached(path, &op, Some(part.compressed_size), &part.dedup_key) + .await?; + let field_levels = parquet_to_arrow_field_levels( + meta.file_metadata().schema_descr(), + ProjectionMask::all(), + None, + )?; + let arrow_schema = parquet_to_arrow_schema( + meta.file_metadata().schema_descr(), + meta.file_metadata().key_value_metadata(), + )?; + let schema = arrow_to_table_schema(&arrow_schema, true)?; + let typ = schema_to_tuple_type(&schema); + + let should_read = |rowgroup_idx: usize, bucket_option: Option<(usize, usize)>| -> bool { + if let Some((bucket, bucket_num)) = bucket_option { + return rowgroup_idx % bucket_num == bucket; + } + true + }; + + let mut start_row = 0; + let mut readers = VecDeque::with_capacity(meta.num_row_groups()); + for (rowgroup_idx, rg) in meta.row_groups().iter().enumerate() { + start_row += rg.num_rows() as u64; + // filter by bucket option + if !should_read(rowgroup_idx, part.bucket_option) { + continue; + } + let mut row_group = + InMemoryRowGroup::new(&part.file, op.clone(), rg, None, ReadSettings::default()); + row_group.fetch(&ProjectionMask::all(), None).await?; + let reader = ParquetRecordBatchReader::try_new_with_row_groups( + &field_levels, + &row_group, + self.batch_size, + None, + )?; + readers.push_back((reader, start_row, typ.clone())); + } + Ok(readers) + } +} + +fn schema_to_tuple_type(schema: &TableSchema) -> TableDataType { + TableDataType::Tuple { + fields_name: schema.fields.iter().map(|f| f.name.clone()).collect(), + fields_type: schema.fields.iter().map(|f| f.data_type.clone()).collect(), + } +} + +pub fn read_small_file( + bytes: Bytes, + batch_size: usize, + tz: &TimeZone, +) -> databend_common_exception::Result { + let len = bytes.len(); + let builder = + ParquetRecordBatchReaderBuilder::try_new_with_options(bytes, ArrowReaderOptions::new())? + .with_batch_size(batch_size); + + // Prune row groups. + let schema = arrow_to_table_schema(builder.schema(), true)?; + let typ = schema_to_tuple_type(&schema); + let reader = builder.build()?; + let mut builder = BinaryColumnBuilder::with_capacity(batch_size, len); + for batch in reader { + let batch = batch?; + read_record_batch(batch, &mut builder, tz, &typ)?; + } + let column = builder.build(); + let num_rows = column.len(); + let entry = BlockEntry::new(DataType::Variant, Value::Column(Column::Variant(column))); + Ok(DataBlock::new(vec![entry], num_rows)) +} + +fn add_internal_columns( + internal_columns: &[InternalColumnType], + path: String, + b: &mut DataBlock, + start_row: &mut u64, +) { + for c in internal_columns { + match c { + InternalColumnType::FileName => { + b.add_column(BlockEntry::new( + DataType::String, + Value::Scalar(Scalar::String(path.clone())), + )); + } + InternalColumnType::FileRowNumber => { + let end_row = (*start_row) + b.num_rows() as u64; + b.add_column(BlockEntry::new( + DataType::Number(NumberDataType::UInt64), + Value::Column(Column::Number( + NumberColumnBuilder::UInt64(((*start_row)..end_row).collect::>()) + .build(), + )), + )); + *start_row = end_row; + } + _ => { + unreachable!() + } + } + } +} diff --git a/src/query/storages/parquet/src/parquet_variant_table/table.rs b/src/query/storages/parquet/src/parquet_variant_table/table.rs new file mode 100644 index 0000000000000..05e0747551591 --- /dev/null +++ b/src/query/storages/parquet/src/parquet_variant_table/table.rs @@ -0,0 +1,95 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use databend_common_catalog::plan::DataSourceInfo; +use databend_common_catalog::plan::DataSourcePlan; +use databend_common_catalog::plan::PartStatistics; +use databend_common_catalog::plan::Partitions; +use databend_common_catalog::plan::StageTableInfo; +use databend_common_catalog::table_context::TableContext; +use databend_common_exception::ErrorCode; +use databend_common_exception::Result; +use databend_common_pipeline_core::Pipeline; +use databend_common_storage::init_stage_operator; + +use crate::parquet_part::collect_parts; +use crate::parquet_variant_table::source::ParquetVariantSource; + +pub struct ParquetVariantTable {} + +impl ParquetVariantTable { + #[async_backtrace::framed] + pub async fn do_read_partitions( + stage_table_info: &StageTableInfo, + ctx: Arc, + ) -> Result<(PartStatistics, Partitions)> { + let operator = init_stage_operator(&stage_table_info.stage_info)?; + let thread_num = ctx.get_settings().get_max_threads()? as usize; + let files = stage_table_info + .files_info + .list(&operator, thread_num, None) + .await? + .into_iter() + .filter(|f| f.size > 0) + .map(|f| (f.path.clone(), f.size, f.dedup_key())) + .collect::>(); + collect_parts(ctx, files, 1.0, 1, 1) + } + + pub fn do_read_data( + ctx: Arc, + plan: &DataSourcePlan, + pipeline: &mut Pipeline, + _put_cache: bool, + ) -> Result<()> { + let settings = ctx.get_settings(); + let max_threads = settings.get_max_threads()? as usize; + let stage_table_info = + if let DataSourceInfo::StageSource(stage_table_info) = &plan.source_info { + stage_table_info + } else { + return Err(ErrorCode::Internal( + "bug: ParquetVariantTable::read_data must be called with StageSource", + )); + }; + let internal_columns = plan + .internal_columns + .as_ref() + .map(|m| { + m.values() + .map(|i| i.column_type.clone()) + .collect::>() + }) + .unwrap_or_default(); + + let operator = Arc::new(init_stage_operator(&stage_table_info.stage_info)?); + + ctx.set_partitions(plan.parts.clone())?; + + pipeline.add_source( + |output| { + ParquetVariantSource::try_create( + ctx.clone(), + output, + internal_columns.clone(), + operator.clone(), + ) + }, + max_threads, + )?; + Ok(()) + } +} diff --git a/src/query/storages/stage/src/stage_table.rs b/src/query/storages/stage/src/stage_table.rs index b9131a5e9c428..f48fe89b398c4 100644 --- a/src/query/storages/stage/src/stage_table.rs +++ b/src/query/storages/stage/src/stage_table.rs @@ -39,6 +39,7 @@ use databend_common_storage::init_stage_operator; use databend_common_storage::StageFileInfo; use databend_common_storages_orc::OrcTableForCopy; use databend_common_storages_parquet::ParquetTableForCopy; +use databend_common_storages_parquet::ParquetVariantTable; use databend_storages_common_stage::SingleFilePartition; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use opendal::Operator; @@ -158,7 +159,12 @@ impl Table for StageTable { let stage_table_info = &self.table_info; match stage_table_info.stage_info.file_format_params { FileFormatParams::Parquet(_) => { - ParquetTableForCopy::do_read_partitions(stage_table_info, ctx, _push_downs).await + if stage_table_info.is_variant { + ParquetVariantTable::do_read_partitions(stage_table_info, ctx).await + } else { + ParquetTableForCopy::do_read_partitions(stage_table_info, ctx, _push_downs) + .await + } } FileFormatParams::Orc(_) => { @@ -199,7 +205,11 @@ impl Table for StageTable { }; match stage_table_info.stage_info.file_format_params { FileFormatParams::Parquet(_) => { - ParquetTableForCopy::do_read_data(ctx, plan, pipeline, _put_cache) + if stage_table_info.is_variant { + ParquetVariantTable::do_read_data(ctx, plan, pipeline, _put_cache) + } else { + ParquetTableForCopy::do_read_data(ctx, plan, pipeline, _put_cache) + } } FileFormatParams::Orc(_) => { OrcTableForCopy::do_read_data(ctx, plan, pipeline, _put_cache) diff --git a/tests/data/parquet/alltypes_plain.parquet.csv b/tests/data/parquet/alltypes_plain.parquet.csv deleted file mode 100644 index b9e853b326253..0000000000000 --- a/tests/data/parquet/alltypes_plain.parquet.csv +++ /dev/null @@ -1,9 +0,0 @@ -id,bool_col,tinyint_col,smallint_col,int_col,bigint_col,float_col,double_col,date_string_col,string_col,timestamp_col -4,True,0,0,0,0,0.0,0.0,b'03/01/09',b'0',2009-03-01 00:00:00 -5,False,1,1,1,10,1.1,10.1,b'03/01/09',b'1',2009-03-01 00:01:00 -6,True,0,0,0,0,0.0,0.0,b'04/01/09',b'0',2009-04-01 00:00:00 -7,False,1,1,1,10,1.1,10.1,b'04/01/09',b'1',2009-04-01 00:01:00 -2,True,0,0,0,0,0.0,0.0,b'02/01/09',b'0',2009-02-01 00:00:00 -3,False,1,1,1,10,1.1,10.1,b'02/01/09',b'1',2009-02-01 00:01:00 -0,True,0,0,0,0,0.0,0.0,b'01/01/09',b'0',2009-01-01 00:00:00 -1,False,1,1,1,10,1.1,10.1,b'01/01/09',b'1',2009-01-01 00:01:00 diff --git a/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage.test b/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage.test index ecbbcba0f316e..def4322f3fb1a 100644 --- a/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage.test +++ b/tests/sqllogictests/suites/base/03_common/03_0028_copy_into_stage.test @@ -52,7 +52,7 @@ statement ok with S as (select number from numbers(1000) where number > 100) COPY INTO @hello from (select number from numbers(1) where number not in (SELECT number FROM S)) FILE_FORMAT = (type = parquet) query I -select sum($1) from @hello; +select sum(number) from @hello; ---- 45 diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test index bbff8a97d17ab..55a566b530543 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0026_merge_into.test @@ -361,7 +361,7 @@ statement ok copy into @s7_merge_into from (select a,b,c from test_stage order by a,b,c); query TTT -select $1,$2,$3 from @s7_merge_into order by $1,$2,$3; +select a,b,c from @s7_merge_into order by a,b,c; ---- 1 a1 b1 2 a2 b2 @@ -377,7 +377,7 @@ select * from target_table order by a,b,c; 2 a_2 b_2 query TT -merge into target_table using (select $1,$2,$3 from @s7_merge_into) as cdc on cdc.$1 = target_table.a when matched then delete when not matched then insert values(cdc.$1,cdc.$2,cdc.$3); +merge into target_table using (select a,b,c from @s7_merge_into) as cdc on cdc.a = target_table.a when matched then delete when not matched then insert values(cdc.a,cdc.b,cdc.c); ---- 1 2 diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0034_pr13848_without_distributed_enable.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0034_pr13848_without_distributed_enable.test index e0773aaa2c04b..f95842170a277 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0034_pr13848_without_distributed_enable.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0034_pr13848_without_distributed_enable.test @@ -115,7 +115,7 @@ copy into @parquet_table0 from (select a,b from merge_source_0 limit 2) ## test agg statement error 1065 -copy into copy_table_test0 from (select avg($1) as a,'b' as b from @parquet_table0); +copy into copy_table_test0 from (select avg(a) as a,'b' as b from @parquet_table0); ## test window statement error 1065 diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0036_merge_into_without_distributed_enable.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0036_merge_into_without_distributed_enable.test index 485cf289a89e1..eba63c64d5a0f 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0036_merge_into_without_distributed_enable.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0036_merge_into_without_distributed_enable.test @@ -358,7 +358,7 @@ statement ok copy into @s4_merge_into from (select a,b,c from test_stage order by a,b,c); query TTT -select $1,$2,$3 from @s4_merge_into order by $1,$2,$3; +select a,b,c from @s4_merge_into order by a,b,c; ---- 1 a1 b1 2 a2 b2 @@ -374,7 +374,7 @@ select * from target_table order by a,b,c; 2 a_2 b_2 query TT -merge into target_table using (select $1,$2,$3 from @s4_merge_into) as cdc on cdc.$1 = target_table.a when matched then delete when not matched then insert values(cdc.$1,cdc.$2,cdc.$3); +merge into target_table using (select a,b,c from @s4_merge_into) as cdc on cdc.a = target_table.a when matched then delete when not matched then insert values(cdc.a,cdc.b,cdc.c); ---- 1 2 diff --git a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test index 43cfa9e7c007e..3eca58d64c2ab 100644 --- a/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test +++ b/tests/sqllogictests/suites/mode/cluster/merge_into_non_equal_distributed.test @@ -296,4 +296,4 @@ select * from t1 order by a; 10 statement ok -set enable_distributed_merge_into = 0; +set enable_distributed_merge_into = 0; \ No newline at end of file diff --git a/tests/sqllogictests/suites/stage/empty_file.test b/tests/sqllogictests/suites/stage/empty_file.test index 070420ba15f19..d8ea04dfde2c9 100644 --- a/tests/sqllogictests/suites/stage/empty_file.test +++ b/tests/sqllogictests/suites/stage/empty_file.test @@ -9,7 +9,7 @@ select $1 from @data (files=>('csv/it.csv', 'empty.txt'), file_format=>'csv') 2 query ok -select $1 from @data (files=>('parquet/ii/f2.parquet', 'empty.txt'), file_format=>'parquet'); +select id from @data (files=>('parquet/ii/f2.parquet', 'empty.txt'), file_format=>'parquet'); ---- 3 4 diff --git a/tests/sqllogictests/suites/stage/formats/parquet/error_diff_schema.test b/tests/sqllogictests/suites/stage/formats/parquet/error_diff_schema.test index a149d16209e8d..d8485bd8be4ea 100644 --- a/tests/sqllogictests/suites/stage/formats/parquet/error_diff_schema.test +++ b/tests/sqllogictests/suites/stage/formats/parquet/error_diff_schema.test @@ -1,5 +1,5 @@ query error diff schema -select $1 from @data/parquet/ (files=>('tuple.parquet', 'complex.parquet')) +select id from @data/parquet/ (files=>('tuple.parquet', 'complex.parquet')) statement ok create or replace table t1 (id int, t TUPLE(A INT32, B STRING)); diff --git a/tests/sqllogictests/suites/stage/formats/parquet/options/select_options.test b/tests/sqllogictests/suites/stage/formats/parquet/options/select_options.test index 7907a928e1971..daafd2721f69f 100644 --- a/tests/sqllogictests/suites/stage/formats/parquet/options/select_options.test +++ b/tests/sqllogictests/suites/stage/formats/parquet/options/select_options.test @@ -1,5 +1,5 @@ query -select $1 from @data/parquet/ (files => ('alltypes_plain.parquet')) a; +select id from @data/parquet/ (files => ('alltypes_plain.parquet')) a; ---- 4 5 diff --git a/tests/sqllogictests/suites/stage/formats/parquet/parquet_metadata.test b/tests/sqllogictests/suites/stage/formats/parquet/parquet_metadata.test index 4966463ad48fc..245d22447a8b3 100644 --- a/tests/sqllogictests/suites/stage/formats/parquet/parquet_metadata.test +++ b/tests/sqllogictests/suites/stage/formats/parquet/parquet_metadata.test @@ -1,5 +1,5 @@ query ok -select metadata$filename, $2, metadata$file_row_number, $1 from @data_s3/parquet/ii/ order by metadata$filename, $2 limit 3; +select metadata$filename, c1, metadata$file_row_number, id from @data_s3/parquet/ii/ order by metadata$filename, c1 limit 3; ---- parquet/ii/f1.parquet 1 0 1 parquet/ii/f1.parquet 2 1 2 @@ -7,7 +7,7 @@ parquet/ii/f2.parquet 3 0 3 query ok -select metadata$filename, $2, metadata$file_row_number, $1 from @data_s3/parquet/ii/ where metadata$file_row_number = 0 and $1 > 1; +select metadata$filename, c1, metadata$file_row_number, id from @data_s3/parquet/ii/ where metadata$file_row_number = 0 and id > 1; ---- parquet/ii/f2.parquet 3 0 3 parquet/ii/f3.parquet 5 0 5 @@ -17,14 +17,14 @@ statement ok create or replace table t(file_name string, id string, row int) query ok -copy into t from (select metadata$filename, $2, metadata$file_row_number + 1 from @data_s3/parquet/ii/) +copy into t from (select metadata$filename, c1, metadata$file_row_number + 1 from @data_s3/parquet/ii/) ---- parquet/ii/f1.parquet 2 0 NULL NULL parquet/ii/f2.parquet 2 0 NULL NULL parquet/ii/f3.parquet 2 0 NULL NULL query ok -select * from t order by $1,$2 +select * from t order by file_name, id ---- parquet/ii/f1.parquet 1 1 parquet/ii/f1.parquet 2 2 diff --git a/tests/sqllogictests/suites/stage/formats/parquet/parquet_to_variant.test b/tests/sqllogictests/suites/stage/formats/parquet/parquet_to_variant.test new file mode 100644 index 0000000000000..cc2bda4c2caf5 --- /dev/null +++ b/tests/sqllogictests/suites/stage/formats/parquet/parquet_to_variant.test @@ -0,0 +1,142 @@ +statement ok +create or replace table t(a variant, b int, c string); + +query +copy into t from (select $1, metadata$file_row_number, metadata$filename from @data/parquet/tuple.parquet); +---- +parquet/tuple.parquet 3 0 NULL NULL + +query +select * from t order by b; +---- +{"id":1,"t":{"A":1,"B":"a"}} 0 parquet/tuple.parquet +{"id":2,"t":{"A":3,"B":"b"}} 1 parquet/tuple.parquet +{"id":3,"t":{"A":3,"B":"c"}} 2 parquet/tuple.parquet + +statement ok +create or replace stage s1; + +statement ok +create or replace table t1(a bool, b int); + +statement ok +insert into t1(b) values(1); + +statement ok +copy into @s1 from t1; + +query +select $1 from @s1; +---- +{"a":null,"b":1} + +query +select * from @s1; +---- +NULL 1 + +query +select $1, metadata$file_row_number as r, metadata$filename as f from @data/parquet/diff_schema/(pattern=>'.*[.]parquet') order by f,r; +---- +{"c1":110,"c2":120,"c3":130} 0 parquet/diff_schema/f1.parquet +{"c1":111,"c2":121,"c3":131} 1 parquet/diff_schema/f1.parquet +{"c1":112,"c2":122,"c3":132} 2 parquet/diff_schema/f1.parquet +{"c1":113,"c2":123,"c3":133} 3 parquet/diff_schema/f1.parquet +{"c1":114,"c2":124,"c3":134} 4 parquet/diff_schema/f1.parquet +{"c1":115,"c2":125,"c3":135} 5 parquet/diff_schema/f1.parquet +{"c1":116,"c2":126,"c3":136} 6 parquet/diff_schema/f1.parquet +{"c1":117,"c2":127,"c3":137} 7 parquet/diff_schema/f1.parquet +{"c1":118,"c2":128,"c3":138} 8 parquet/diff_schema/f1.parquet +{"c1":119,"c2":129,"c3":139} 9 parquet/diff_schema/f1.parquet +{"c2":220,"c4":"240","c5":250,"c6":260} 0 parquet/diff_schema/f2.parquet +{"c2":221,"c4":"241","c5":251,"c6":261} 1 parquet/diff_schema/f2.parquet +{"c2":222,"c4":"242","c5":252,"c6":262} 2 parquet/diff_schema/f2.parquet +{"c2":223,"c4":"243","c5":253,"c6":263} 3 parquet/diff_schema/f2.parquet +{"c2":224,"c4":"244","c5":254,"c6":264} 4 parquet/diff_schema/f2.parquet +{"c2":225,"c4":"245","c5":255,"c6":265} 5 parquet/diff_schema/f2.parquet +{"c2":226,"c4":"246","c5":256,"c6":266} 6 parquet/diff_schema/f2.parquet +{"c2":227,"c4":"247","c5":257,"c6":267} 7 parquet/diff_schema/f2.parquet +{"c2":228,"c4":"248","c5":258,"c6":268} 8 parquet/diff_schema/f2.parquet +{"c2":229,"c4":"249","c5":259,"c6":269} 9 parquet/diff_schema/f2.parquet + +query +select $1 from @data/parquet/tuple.parquet; +---- +{"id":1,"t":{"A":1,"B":"a"}} +{"id":2,"t":{"A":3,"B":"b"}} +{"id":3,"t":{"A":3,"B":"c"}} + +query +select $1 from @data/parquet/alltypes_plain.parquet; +---- +{"bigint_col":0,"bool_col":true,"date_string_col":"03/01/09","double_col":0.0,"float_col":0.0,"id":4,"int_col":0,"smallint_col":0,"string_col":"0","timestamp_col":"2009-03-01 00:00:00.000000","tinyint_col":0} +{"bigint_col":10,"bool_col":false,"date_string_col":"03/01/09","double_col":10.1,"float_col":1.100000023841858,"id":5,"int_col":1,"smallint_col":1,"string_col":"1","timestamp_col":"2009-03-01 00:01:00.000000","tinyint_col":1} +{"bigint_col":0,"bool_col":true,"date_string_col":"04/01/09","double_col":0.0,"float_col":0.0,"id":6,"int_col":0,"smallint_col":0,"string_col":"0","timestamp_col":"2009-04-01 00:00:00.000000","tinyint_col":0} +{"bigint_col":10,"bool_col":false,"date_string_col":"04/01/09","double_col":10.1,"float_col":1.100000023841858,"id":7,"int_col":1,"smallint_col":1,"string_col":"1","timestamp_col":"2009-04-01 00:01:00.000000","tinyint_col":1} +{"bigint_col":0,"bool_col":true,"date_string_col":"02/01/09","double_col":0.0,"float_col":0.0,"id":2,"int_col":0,"smallint_col":0,"string_col":"0","timestamp_col":"2009-02-01 00:00:00.000000","tinyint_col":0} +{"bigint_col":10,"bool_col":false,"date_string_col":"02/01/09","double_col":10.1,"float_col":1.100000023841858,"id":3,"int_col":1,"smallint_col":1,"string_col":"1","timestamp_col":"2009-02-01 00:01:00.000000","tinyint_col":1} +{"bigint_col":0,"bool_col":true,"date_string_col":"01/01/09","double_col":0.0,"float_col":0.0,"id":0,"int_col":0,"smallint_col":0,"string_col":"0","timestamp_col":"2009-01-01 00:00:00.000000","tinyint_col":0} +{"bigint_col":10,"bool_col":false,"date_string_col":"01/01/09","double_col":10.1,"float_col":1.100000023841858,"id":1,"int_col":1,"smallint_col":1,"string_col":"1","timestamp_col":"2009-01-01 00:01:00.000000","tinyint_col":1} + +query error id +select $1,id from @data/parquet/tuple.parquet; + +query error position +select $2 from @data/parquet/tuple.parquet; + +query +select $1 from @data/parquet/complex.parquet; +---- +{"active":true,"address":[{"city":"Verona","country":"US","district":"DANE","extension":null,"id":null,"line":["1979 Milky Way",""],"period":null,"postalCode":"53593","state":"WI","text":null,"type":null,"use":"home"}],"birthDate":"1964-02-05","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":true}],"contact":[{"address":{"city":"Verona","country":"US","district":"DANE","extension":null,"id":null,"line":["1979 Milky Way",""],"period":null,"postalCode":"53593","state":"WI","text":null,"type":null,"use":"home"},"extension":null,"gender":null,"id":null,"modifierExtension":null,"name":{"extension":null,"family":null,"given":null,"id":null,"period":null,"prefix":null,"suffix":null,"text":"Jenny Inf Validate","use":"usual"},"organization":null,"period":null,"relationship":[{"coding":[{"code":"C","display":"Emergency Contact","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0131","userSelected":null,"version":null}],"extension":null,"id":null,"text":null}],"telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"temp","value":"608-986-4254"}]}],"contained":null,"deceasedBoolean":false,"deceasedDateTime":null,"extension":["{\"extension\":[{\"valueCoding\":{\"system\":\"http://terminology.hl7.org/CodeSystem/v3-NullFlavor\",\"code\":\"UNK\",\"display\":\"Unknown\"},\"url\":\"ombCategory\"},{\"valueString\":\"Unknown\",\"url\":\"text\"}],\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\"}","{\"extension\":[{\"valueString\":\"Unknown\",\"url\":\"text\"}],\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\"}","{\"valueCodeableConcept\":{\"coding\":[{\"system\":\"urn:oid:1.2.840.114350.1.13.0.1.7.10.698084.130.657370.19999000\",\"code\":\"female\"}]},\"url\":\"http://open.epic.com/FHIR/StructureDefinition/extension/legal-sex\"}","{\"valueCodeableConcept\":{\"coding\":[{\"system\":\"urn:oid:1.2.840.114350.1.13.0.1.7.10.698084.130.657370.19999000\",\"code\":\"female\"}]},\"url\":\"http://open.epic.com/FHIR/StructureDefinition/extension/sex-for-clinical-use\"}","{\"valueCode\":\"F\",\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\"}"],"gender":"female","generalPractitioner":null,"id":"eTplvxRvcd-eT1nEI8BvQRQ3","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.1","type":null,"use":"usual","value":"234-98-7197"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:1.2.840.114350.1.1","type":{"coding":null,"extension":null,"id":null,"text":"EPIC"},"use":"usual","value":"E3480"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:1.2.840.114350.1.13.0.1.7.2.698084","type":{"coding":null,"extension":null,"id":null,"text":"EXTERNAL"},"use":"usual","value":"Z5337"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","type":{"coding":null,"extension":null,"id":null,"text":"FHIR"},"use":"usual","value":"TkxuGNMCpBMIkhR5bO0UFliCEuV9bS6E9zUmYP0aN7HgB"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","type":{"coding":null,"extension":null,"id":null,"text":"FHIR STU3"},"use":"usual","value":"eTplvxRvcd-eT1nEI8BvQRQ3"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:1.2.840.114350.1.13.0.1.7.2.698084","type":{"coding":null,"extension":null,"id":null,"text":"INTERNAL"},"use":"usual","value":" Z5337"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:1.2.840.114350.1.13.0.1.7.5.737384.14","type":{"coding":null,"extension":null,"id":null,"text":"MRN"},"use":"usual","value":"203235"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:1.2.840.114350.1.13.0.1.7.3.878082.110","type":{"coding":null,"extension":null,"id":null,"text":"MYCHARTLOGIN"},"use":"usual","value":"ICONJENNIFER"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:1.2.840.114350.1.13.0.1.7.2.878082","type":{"coding":null,"extension":null,"id":null,"text":"WPRINTERNAL"},"use":"usual","value":"280"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":{"display":"Epic Hospital System","extension":null,"id":null,"identifier":null,"reference":"Organization/enRyWnSP963FYDpoks4NHOA3","type":null},"maritalStatus":{"coding":null,"extension":null,"id":null,"text":"Unknown"},"meta":null,"modifierExtension":null,"multipleBirthBoolean":null,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Infection Control","given":["Jennifer"],"id":null,"period":null,"prefix":null,"suffix":null,"text":"Jennifer Infection Control","use":"official"},{"extension":null,"family":"Infection Control","given":["Jennifer"],"id":null,"period":null,"prefix":null,"suffix":null,"text":"Jennifer Infection Control","use":"usual"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"608-986-4254"}],"text":null,"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Attleboro","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":41.89010728818103},{\"url\":\"longitude\",\"valueDecimal\":-71.3059007728953}]}"],"id":null,"line":["776 Waelchi Overpass Suite 25"],"period":null,"postalCode":"02703","state":"MA","text":null,"type":null,"use":null}],"birthDate":"1976-12-16","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Keli839 Windler79\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"M\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Boston\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.626048129961879}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":43.37395187003812}"],"gender":"male","generalPractitioner":null,"id":"1127421b-66fd-85ff-b92e-827c9a280be2","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"1127421b-66fd-85ff-b92e-827c9a280be2"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"1127421b-66fd-85ff-b92e-827c9a280be2"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-43-1300"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99915425"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X77820784X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"S","display":"S","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"S"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Quigley282","given":["Wilmer32"],"id":null,"period":null,"prefix":["Mr."],"suffix":null,"text":null,"use":"official"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-840-3615"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: -223297253198561881 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Springfield","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":42.09871671034907},{\"url\":\"longitude\",\"valueDecimal\":-72.57154867948968}]}"],"id":null,"line":["906 Vandervort Crossroad Apt 64"],"period":null,"postalCode":"01104","state":"MA","text":null,"type":null,"use":null}],"birthDate":"1988-04-26","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Quinn173 Lebsack687\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"F\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Bedford\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.045211340867391314}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":31.95478865913261}"],"gender":"female","generalPractitioner":null,"id":"3bdac299-3731-0acd-9cd9-fbba40236e3a","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"3bdac299-3731-0acd-9cd9-fbba40236e3a"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"3bdac299-3731-0acd-9cd9-fbba40236e3a"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-31-6394"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99998286"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X85070564X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"M","display":"M","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"M"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Collier206","given":["Towanda270"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"official"},{"extension":null,"family":"Mayer370","given":["Towanda270"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"maiden"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-973-5271"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: -3514354516532618450 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Danvers","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":42.594891893685656},{\"url\":\"longitude\",\"valueDecimal\":-70.91669250267296}]}"],"id":null,"line":["141 Hane Club"],"period":null,"postalCode":null,"state":"MA","text":null,"type":null,"use":null}],"birthDate":"1994-09-30","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Olga567 Wunsch504\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"F\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Tyngsborough\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.1435201139786352}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":25.856479886021365}"],"gender":"female","generalPractitioner":null,"id":"47e6dfcb-30a9-2a66-c8fb-984da359ead1","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"47e6dfcb-30a9-2a66-c8fb-984da359ead1"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"47e6dfcb-30a9-2a66-c8fb-984da359ead1"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-19-2252"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99944894"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X71377926X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"M","display":"M","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"M"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Powlowski563","given":["Bella510"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"official"},{"extension":null,"family":"Prohaska837","given":["Bella510"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"maiden"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-102-4193"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: -1236940699136042677 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Eastham","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":41.84268235860006},{\"url\":\"longitude\",\"valueDecimal\":-69.97656155315549}]}"],"id":null,"line":["402 Rath Manor Suite 94"],"period":null,"postalCode":null,"state":"MA","text":null,"type":null,"use":null}],"birthDate":"1965-10-21","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Cecile401 Heidenreich818\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"F\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"New Bedford\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.7725555461331691}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":54.22744445386683}"],"gender":"female","generalPractitioner":null,"id":"8aec8d06-74af-aed5-0132-5b77fc6b418b","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"8aec8d06-74af-aed5-0132-5b77fc6b418b"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"8aec8d06-74af-aed5-0132-5b77fc6b418b"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-38-7110"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99959565"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X7055866X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"S","display":"S","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"S"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Konopelski743","given":["Alysa236"],"id":null,"period":null,"prefix":["Ms."],"suffix":null,"text":null,"use":"official"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-526-4525"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: -6858861957750234386 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Lowell","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":42.67217723881718},{\"url\":\"longitude\",\"valueDecimal\":-71.29043550601857}]}"],"id":null,"line":["148 Schneider Neck"],"period":null,"postalCode":"01851","state":"MA","text":null,"type":null,"use":null}],"birthDate":"1982-12-05","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Dorothea248 Raynor401\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"M\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Marlborough\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.8268701269918577}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":37.17312987300814}"],"gender":"male","generalPractitioner":null,"id":"03af4547-1d67-68ae-ebbc-cc5a9fc6c898","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"03af4547-1d67-68ae-ebbc-cc5a9fc6c898"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"03af4547-1d67-68ae-ebbc-cc5a9fc6c898"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-64-5943"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99943522"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X52742960X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"M","display":"M","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"M"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Schuster709","given":["Carlton317"],"id":null,"period":null,"prefix":["Mr."],"suffix":["JD"],"text":null,"use":"official"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-174-1540"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: 6256616874878650009 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Greenfield","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":42.610630412083424},{\"url\":\"longitude\",\"valueDecimal\":-72.59294763251627}]}"],"id":null,"line":["313 Flatley Neck Unit 4"],"period":null,"postalCode":null,"state":"MA","text":null,"type":null,"use":null}],"birthDate":"1963-10-26","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Erika442 Morar593\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"F\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Milford\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.5801999125523113}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":56.41980008744769}"],"gender":"female","generalPractitioner":null,"id":"180aefff-e7cc-940e-cb2c-3ed99c1aed39","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"180aefff-e7cc-940e-cb2c-3ed99c1aed39"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"180aefff-e7cc-940e-cb2c-3ed99c1aed39"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-17-6977"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99917899"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X34904922X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"M","display":"M","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"M"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Runolfsson901","given":["Arlene209"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"official"},{"extension":null,"family":"Dibbert990","given":["Arlene209"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"maiden"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-288-9267"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: -3898542022148173457 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Lowell","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":42.65303866038144},{\"url\":\"longitude\",\"valueDecimal\":-71.38787925845732}]}"],"id":null,"line":["275 Nienow Junction"],"period":null,"postalCode":null,"state":"MA","text":null,"type":null,"use":null}],"birthDate":"2006-01-01","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Nakesha793 Bernhard322\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"M\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"East Longmeadow\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.001130283521684783}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":14.998869716478316}"],"gender":"male","generalPractitioner":null,"id":"966070ad-f22e-631f-acf2-657c90f903f1","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"966070ad-f22e-631f-acf2-657c90f903f1"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"966070ad-f22e-631f-acf2-657c90f903f1"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-16-9875"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"S","display":"Never Married","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Never Married"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Welch179","given":["Adrian111"],"id":null,"period":null,"prefix":null,"suffix":null,"text":null,"use":"official"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-235-2038"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: 4635699768341268942 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Belmont","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":42.43884801250347},{\"url\":\"longitude\",\"valueDecimal\":-71.14121105733896}]}"],"id":null,"line":["1047 Beier Park Unit 7"],"period":null,"postalCode":"02478","state":"MA","text":null,"type":null,"use":null}],"birthDate":"1961-10-27","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Kara173 Botsford977\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"F\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Methuen\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":13.34103765187151}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":45.65896234812849}"],"gender":"female","generalPractitioner":null,"id":"d94a1af1-bf9b-a705-bc54-2d0009c39981","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"d94a1af1-bf9b-a705-bc54-2d0009c39981"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"d94a1af1-bf9b-a705-bc54-2d0009c39981"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-12-5825"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"urn:oid:2.16.840.1.113883.4.3.25","type":{"coding":[{"code":"DL","display":"Driver's License","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Driver's License"},"use":null,"value":"S99943233"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://standardhealthrecord.org/fhir/StructureDefinition/passportNumber","type":{"coding":[{"code":"PPN","display":"Passport Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Passport Number"},"use":null,"value":"X40077261X"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"M","display":"M","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"M"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Buckridge80","given":["Shane235"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"official"},{"extension":null,"family":"Leannon79","given":["Shane235"],"id":null,"period":null,"prefix":["Mrs."],"suffix":null,"text":null,"use":"maiden"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-575-1128"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: 3390461079555467149 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} +{"active":null,"address":[{"city":"Fairhaven","country":"US","district":null,"extension":["{\"url\":\"http://hl7.org/fhir/StructureDefinition/geolocation\",\"extension\":[{\"url\":\"latitude\",\"valueDecimal\":41.66838505766839},{\"url\":\"longitude\",\"valueDecimal\":-70.8618566603759}]}"],"id":null,"line":["588 Berge Quay Suite 84"],"period":null,"postalCode":null,"state":"MA","text":null,"type":null,"use":null}],"birthDate":"2014-01-22","communication":[{"extension":null,"id":null,"language":{"coding":[{"code":"en-US","display":"English","extension":null,"id":null,"system":"urn:ietf:bcp:47","userSelected":null,"version":null}],"extension":null,"id":null,"text":"English"},"modifierExtension":null,"preferred":null}],"contact":null,"contained":null,"deceasedBoolean":null,"deceasedDateTime":null,"extension":["{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-race\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2106-3\",\"display\":\"White\"}},{\"url\":\"text\",\"valueString\":\"White\"}]}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity\",\"extension\":[{\"url\":\"ombCategory\",\"valueCoding\":{\"system\":\"urn:oid:2.16.840.1.113883.6.238\",\"code\":\"2186-5\",\"display\":\"Not Hispanic or Latino\"}},{\"url\":\"text\",\"valueString\":\"Not Hispanic or Latino\"}]}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-mothersMaidenName\",\"valueString\":\"Ana972 Kiehn525\"}","{\"url\":\"http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex\",\"valueCode\":\"F\"}","{\"url\":\"http://hl7.org/fhir/StructureDefinition/patient-birthPlace\",\"valueAddress\":{\"city\":\"Amherst\",\"state\":\"Massachusetts\",\"country\":\"US\"}}","{\"url\":\"http://synthetichealth.github.io/synthea/disability-adjusted-life-years\",\"valueDecimal\":0.006828050454379106}","{\"url\":\"http://synthetichealth.github.io/synthea/quality-adjusted-life-years\",\"valueDecimal\":5.993171949545621}"],"gender":"female","generalPractitioner":null,"id":"7f217143-f36c-707a-b77c-f4a1cf70f952","identifier":[{"assigner":null,"extension":null,"id":null,"period":null,"system":"AmbulatoryEHR1","type":null,"use":null,"value":"7f217143-f36c-707a-b77c-f4a1cf70f952"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hospital.smarthealthit.org","type":{"coding":[{"code":"MR","display":"Medical Record Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Medical Record Number"},"use":null,"value":"7f217143-f36c-707a-b77c-f4a1cf70f952"},{"assigner":null,"extension":null,"id":null,"period":null,"system":"http://hl7.org/fhir/sid/us-ssn","type":{"coding":[{"code":"SS","display":"Social Security Number","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v2-0203","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Social Security Number"},"use":null,"value":"999-69-3793"}],"implicitRules":null,"language":null,"link":null,"managingOrganization":null,"maritalStatus":{"coding":[{"code":"S","display":"Never Married","extension":null,"id":null,"system":"http://terminology.hl7.org/CodeSystem/v3-MaritalStatus","userSelected":null,"version":null}],"extension":null,"id":null,"text":"Never Married"},"meta":{"extension":null,"id":null,"lastUpdated":null,"profile":["http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient"],"security":null,"source":null,"tag":null,"versionId":null},"modifierExtension":null,"multipleBirthBoolean":false,"multipleBirthInteger":null,"name":[{"extension":null,"family":"Volkman526","given":["Luana867"],"id":null,"period":null,"prefix":null,"suffix":null,"text":null,"use":"official"}],"photo":null,"resourceType":"Patient","telecom":[{"extension":null,"id":null,"period":null,"rank":null,"system":"phone","use":"home","value":"555-207-7879"}],"text":{"div":"
Generated by Synthea.Version identifier: 9f5ff46\n . Person seed: 165098197321413909 Population seed: 960411
","extension":null,"id":null,"status":"generated"},"yy__us_core_birthsex":null,"yy__us_core_ethnicity":null,"yy__us_core_race":null,"yy__version":null} + +query +select /*+set_var(parquet_fast_read_bytes=1 )*/ $1 from @data/parquet/tuple.parquet; +---- +{"id":1,"t":{"A":1,"B":"a"}} +{"id":2,"t":{"A":3,"B":"b"}} +{"id":3,"t":{"A":3,"B":"c"}} + +query +select $1:address.0.city::string , $1:birthDate::date as dt from @data/parquet/complex.parquet order by dt; +---- +Belmont 1961-10-27 +Greenfield 1963-10-26 +Verona 1964-02-05 +Eastham 1965-10-21 +Attleboro 1976-12-16 +Lowell 1982-12-05 +Springfield 1988-04-26 +Danvers 1994-09-30 +Lowell 2006-01-01 +Fairhaven 2014-01-22 + +statement ok +create or replace table t3(a string, b date); + +query +copy into t3 from (select $1:address.0.city, $1:birthDate from @data/parquet/complex.parquet) +---- +parquet/complex.parquet 10 0 NULL NULL + +query +select * from t3 order by b +---- +Belmont 1961-10-27 +Greenfield 1963-10-26 +Verona 1964-02-05 +Eastham 1965-10-21 +Attleboro 1976-12-16 +Lowell 1982-12-05 +Springfield 1988-04-26 +Danvers 1994-09-30 +Lowell 2006-01-01 +Fairhaven 2014-01-22 diff --git a/tests/sqllogictests/suites/stage/ordered_unload.test b/tests/sqllogictests/suites/stage/ordered_unload.test index ac5a3438d2439..22ee5cac697b2 100644 --- a/tests/sqllogictests/suites/stage/ordered_unload.test +++ b/tests/sqllogictests/suites/stage/ordered_unload.test @@ -38,7 +38,7 @@ statement ok copy /*+ set_var(max_block_size=10) */ INTO @s1 from (select * from numbers(10000) order by number) file_format=(type=parquet) max_file_size=100; query -SELECT COUNT(*) FROM (SELECT $1 AS a, rank() OVER (ORDER BY metadata$filename, metadata$file_row_number, $1) AS rank FROM '@s1' ( FILE_FORMAT => 'parquet', )) WHERE a + 1 = rank +SELECT COUNT(*) FROM (SELECT number AS a, rank() OVER (ORDER BY metadata$filename, metadata$file_row_number, number) AS rank FROM '@s1' ( FILE_FORMAT => 'parquet', )) WHERE a + 1 = rank ---- 10000 diff --git a/tests/sqllogictests/suites/stage/select_star.test b/tests/sqllogictests/suites/stage/select_star.test index e5a9ba96d17ac..bec545a8062e6 100644 --- a/tests/sqllogictests/suites/stage/select_star.test +++ b/tests/sqllogictests/suites/stage/select_star.test @@ -34,11 +34,8 @@ remove @unload statement ok copy into @unload from a file_format = (type = 'parquet') -query +query error 1065 select *, $1 from @unload (file_format => 'parquet') ----- -1 1 -2 2 query select * from @unload (file_format => 'parquet')