From 1bce4c09bf3d843ba3ae87ad04bc417e6ca4ba19 Mon Sep 17 00:00:00 2001 From: Abhi Agarwal Date: Mon, 13 Oct 2025 07:30:48 -0400 Subject: [PATCH 1/5] Implement smoke 'benchmark' Signed-off-by: Abhi Agarwal --- crates/benchmarks/benches/smoke.rs | 35 ++++++ crates/benchmarks/src/lib.rs | 187 +---------------------------- crates/benchmarks/src/main.rs | 126 +++++++++++++------ crates/benchmarks/src/merge.rs | 186 ++++++++++++++++++++++++++++ crates/benchmarks/src/smoke.rs | 87 ++++++++++++++ 5 files changed, 402 insertions(+), 219 deletions(-) create mode 100644 crates/benchmarks/benches/smoke.rs create mode 100644 crates/benchmarks/src/merge.rs create mode 100644 crates/benchmarks/src/smoke.rs diff --git a/crates/benchmarks/benches/smoke.rs b/crates/benchmarks/benches/smoke.rs new file mode 100644 index 0000000000..92ee7b41fc --- /dev/null +++ b/crates/benchmarks/benches/smoke.rs @@ -0,0 +1,35 @@ +use delta_benchmarks::{run_smoke_once, SmokeParams}; +use divan::{AllocProfiler, Bencher}; +use url::Url; + +fn main() { + divan::main(); +} + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +type Runtime = tokio::runtime::Runtime; + +fn bench_smoke(bencher: Bencher, params: &SmokeParams) { + let rt = Runtime::new().expect("create tokio runtime"); + bencher + .with_inputs(|| tempfile::tempdir().expect("create temp dir")) + .bench_local_values(|tmp_dir| { + let table_url = Url::from_directory_path(tmp_dir.path()).expect("tmp dir url"); + rt.block_on(async { + run_smoke_once(&table_url, params).await.expect("smoke run"); + }); + drop(tmp_dir); + }); +} + +#[divan::bench(args = [ + SmokeParams { rows: 2 }, + SmokeParams { rows: 10 }, + SmokeParams { rows: 100 }, + SmokeParams { rows: 1_000 }, +])] +fn smoke(bencher: Bencher, params: &SmokeParams) { + bench_smoke(bencher, params); +} diff --git a/crates/benchmarks/src/lib.rs b/crates/benchmarks/src/lib.rs index f8533e4cd2..e0cad5b356 100644 --- a/crates/benchmarks/src/lib.rs +++ b/crates/benchmarks/src/lib.rs @@ -1,184 +1,7 @@ -use std::path::Path; +pub mod merge; +pub mod smoke; -use deltalake_core::datafusion::functions::expr_fn; -use deltalake_core::kernel::engine::arrow_conversion::TryIntoKernel; -use deltalake_core::kernel::{StructField, StructType}; -use deltalake_core::operations::merge::MergeBuilder; -use deltalake_core::{arrow, DeltaResult}; -use deltalake_core::{ - datafusion::{ - logical_expr::{cast, lit}, - prelude::{DataFrame, ParquetReadOptions, SessionContext}, - }, - DeltaOps, DeltaTable, DeltaTableError, +pub use merge::{ + merge_delete, merge_insert, merge_upsert, prepare_source_and_table, MergeOp, MergePerfParams, }; -use tempfile::TempDir; -use url::Url; - -pub type MergeOp = fn(DataFrame, DeltaTable) -> Result; - -#[derive(Debug, Clone)] -pub struct MergePerfParams { - pub sample_matched_rows: f32, - pub sample_not_matched_rows: f32, -} - -pub fn merge_upsert(source: DataFrame, table: DeltaTable) -> Result { - deltalake_core::DeltaOps(table) - .merge(source, "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number") - .with_source_alias("source") - .with_target_alias("target") - .when_matched_update(|update| { - update - .update("wr_returned_date_sk", "source.wr_returned_date_sk") - .update("wr_returned_time_sk", "source.wr_returned_time_sk") - .update("wr_item_sk", "source.wr_item_sk") - .update("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") - .update("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") - .update("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") - .update("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") - .update("wr_returning_customer_sk", "source.wr_returning_customer_sk") - .update("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") - .update("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") - .update("wr_returning_addr_sk", "source.wr_returning_addr_sk") - .update("wr_web_page_sk", "source.wr_web_page_sk") - .update("wr_reason_sk", "source.wr_reason_sk") - .update("wr_order_number", "source.wr_order_number") - .update("wr_return_quantity", "source.wr_return_quantity") - .update("wr_return_amt", "source.wr_return_amt") - .update("wr_return_tax", "source.wr_return_tax") - .update("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") - .update("wr_fee", "source.wr_fee") - .update("wr_return_ship_cost", "source.wr_return_ship_cost") - .update("wr_refunded_cash", "source.wr_refunded_cash") - .update("wr_reversed_charge", "source.wr_reversed_charge") - .update("wr_account_credit", "source.wr_account_credit") - .update("wr_net_loss", "source.wr_net_loss") - })? - .when_not_matched_insert(|insert| { - insert - .set("wr_returned_date_sk", "source.wr_returned_date_sk") - .set("wr_returned_time_sk", "source.wr_returned_time_sk") - .set("wr_item_sk", "source.wr_item_sk") - .set("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") - .set("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") - .set("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") - .set("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") - .set("wr_returning_customer_sk", "source.wr_returning_customer_sk") - .set("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") - .set("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") - .set("wr_returning_addr_sk", "source.wr_returning_addr_sk") - .set("wr_web_page_sk", "source.wr_web_page_sk") - .set("wr_reason_sk", "source.wr_reason_sk") - .set("wr_order_number", "source.wr_order_number") - .set("wr_return_quantity", "source.wr_return_quantity") - .set("wr_return_amt", "source.wr_return_amt") - .set("wr_return_tax", "source.wr_return_tax") - .set("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") - .set("wr_fee", "source.wr_fee") - .set("wr_return_ship_cost", "source.wr_return_ship_cost") - .set("wr_refunded_cash", "source.wr_refunded_cash") - .set("wr_reversed_charge", "source.wr_reversed_charge") - .set("wr_account_credit", "source.wr_account_credit") - .set("wr_net_loss", "source.wr_net_loss") - }) -} - -pub fn merge_insert(source: DataFrame, table: DeltaTable) -> Result { - deltalake_core::DeltaOps(table) - .merge(source, "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number") - .with_source_alias("source") - .with_target_alias("target") - .when_not_matched_insert(|insert| { - insert - .set("wr_returned_date_sk", "source.wr_returned_date_sk") - .set("wr_returned_time_sk", "source.wr_returned_time_sk") - .set("wr_item_sk", "source.wr_item_sk") - .set("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") - .set("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") - .set("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") - .set("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") - .set("wr_returning_customer_sk", "source.wr_returning_customer_sk") - .set("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") - .set("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") - .set("wr_returning_addr_sk", "source.wr_returning_addr_sk") - .set("wr_web_page_sk", "source.wr_web_page_sk") - .set("wr_reason_sk", "source.wr_reason_sk") - .set("wr_order_number", "source.wr_order_number") - .set("wr_return_quantity", "source.wr_return_quantity") - .set("wr_return_amt", "source.wr_return_amt") - .set("wr_return_tax", "source.wr_return_tax") - .set("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") - .set("wr_fee", "source.wr_fee") - .set("wr_return_ship_cost", "source.wr_return_ship_cost") - .set("wr_refunded_cash", "source.wr_refunded_cash") - .set("wr_reversed_charge", "source.wr_reversed_charge") - .set("wr_account_credit", "source.wr_account_credit") - .set("wr_net_loss", "source.wr_net_loss") - }) -} - -pub fn merge_delete(source: DataFrame, table: DeltaTable) -> Result { - deltalake_core::DeltaOps(table) - .merge(source, "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number") - .with_source_alias("source") - .with_target_alias("target") - .when_matched_delete(|delete| delete) -} - -/// Prepare source DataFrame and target Delta table from DuckDB-generated TPC-DS parquet. -/// Creates a temporary Delta table from web_returns.parquet as the target. -/// Returns (source_df, target_table) for benchmarking. -pub async fn prepare_source_and_table( - params: &MergePerfParams, - tmp_dir: &TempDir, - parquet_dir: &Path, -) -> DeltaResult<(DataFrame, DeltaTable)> { - let ctx = SessionContext::new(); - - let parquet_path = parquet_dir - .join("web_returns.parquet") - .to_str() - .unwrap() - .to_owned(); - - let parquet_df = ctx - .read_parquet(&parquet_path, ParquetReadOptions::default()) - .await?; - let temp_table_url = Url::from_directory_path(tmp_dir).unwrap(); - - let schema = parquet_df.schema(); - let delta_schema: StructType = schema.as_arrow().try_into_kernel().unwrap(); - - let batches = parquet_df.collect().await?; - let fields: Vec = delta_schema.fields().cloned().collect(); - let table = DeltaOps::try_from_uri(temp_table_url) - .await? - .create() - .with_columns(fields) - .await?; - - let table = DeltaOps(table).write(batches).await?; - - // Now prepare source DataFrame with sampling - let source = ctx - .read_parquet(&parquet_path, ParquetReadOptions::default()) - .await?; - - // Split matched and not-matched portions - let matched = source - .clone() - .filter(expr_fn::random().lt_eq(lit(params.sample_matched_rows)))?; - - let rand = cast( - expr_fn::random() * lit(u32::MAX), - arrow::datatypes::DataType::Int64, - ); - let not_matched = source - .filter(expr_fn::random().lt_eq(lit(params.sample_not_matched_rows)))? - .with_column("wr_item_sk", rand.clone())? - .with_column("wr_order_number", rand)?; - - let source = matched.union(not_matched)?; - Ok((source, table)) -} +pub use smoke::{run_smoke_once, SmokeParams}; diff --git a/crates/benchmarks/src/main.rs b/crates/benchmarks/src/main.rs index fa7112d8b2..d1accd04e3 100644 --- a/crates/benchmarks/src/main.rs +++ b/crates/benchmarks/src/main.rs @@ -1,10 +1,12 @@ use std::{path::PathBuf, time::Instant}; -use clap::{Parser, ValueEnum}; +use clap::{Parser, Subcommand, ValueEnum}; use delta_benchmarks::{ - merge_delete, merge_insert, merge_upsert, prepare_source_and_table, MergeOp, MergePerfParams, + merge_delete, merge_insert, merge_upsert, prepare_source_and_table, run_smoke_once, MergeOp, + MergePerfParams, SmokeParams, }; +use deltalake_core::ensure_table_uri; #[derive(Copy, Clone, Debug, ValueEnum)] enum OpKind { @@ -14,53 +16,103 @@ enum OpKind { } #[derive(Parser, Debug)] -#[command(about = "Run a merge benchmark with configurable parameters")] +#[command(about = "Run delta-rs benchmarks")] struct Cli { - /// Operation to benchmark - #[arg(value_enum)] - op: OpKind, + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Run a merge benchmark with configurable parameters + Merge { + /// Operation to benchmark + #[arg(value_enum)] + op: OpKind, + + /// Fraction of rows that match an existing key (0.0-1.0) + #[arg(long, default_value_t = 0.01)] + matched: f32, + + /// Fraction of rows that do not match (0.0-1.0) + #[arg(long, default_value_t = 0.10)] + not_matched: f32, + }, - /// Fraction of rows that match an existing key (0.0-1.0) - #[arg(long, default_value_t = 0.01)] - matched: f32, + /// Run the smoke workload to validate delta-rs read/write operations + Smoke { + /// Number of rows to write into the smoke table + #[arg(long, default_value_t = 2)] + rows: usize, - /// Fraction of rows that do not match (0.0-1.0) - #[arg(long, default_value_t = 0.10)] - not_matched: f32, + /// Optional table path to reuse for the smoke run (defaults to a temporary directory) + #[arg(long)] + table_path: Option, + }, } #[tokio::main] -async fn main() { +async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); - let op_fn: MergeOp = match cli.op { - OpKind::Upsert => merge_upsert, - OpKind::Delete => merge_delete, - OpKind::Insert => merge_insert, - }; + match cli.command { + Command::Merge { + op, + matched, + not_matched, + } => { + let op_fn: MergeOp = match op { + OpKind::Upsert => merge_upsert, + OpKind::Delete => merge_delete, + OpKind::Insert => merge_insert, + }; + + let params = MergePerfParams { + sample_matched_rows: matched, + sample_not_matched_rows: not_matched, + }; + + let tmp_dir = tempfile::tempdir()?; + + let parquet_dir = PathBuf::from( + std::env::var("TPCDS_PARQUET_DIR") + .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), + ); - let params = MergePerfParams { - sample_matched_rows: cli.matched, - sample_not_matched_rows: cli.not_matched, - }; + let (source, table) = prepare_source_and_table(¶ms, &tmp_dir, &parquet_dir).await?; - let tmp_dir = tempfile::tempdir().expect("create tmp dir"); + let start = Instant::now(); + let (_table, metrics) = op_fn(source, table)?.await?; + let elapsed = start.elapsed(); - let parquet_dir = PathBuf::from( - std::env::var("TPCDS_PARQUET_DIR") - .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), - ); + println!( + "merge_duration_ms={} metrics={:?}", + elapsed.as_millis(), + metrics + ); + } + Command::Smoke { rows, table_path } => { + let params = SmokeParams { rows }; + let (table_url, _guard) = match table_path { + Some(path) => (ensure_table_uri(path.to_string_lossy().as_ref())?, None), + None => { + let dir = tempfile::tempdir()?; + let url = ensure_table_uri(dir.path().to_string_lossy().as_ref())?; + (url, Some(dir)) + } + }; - let (source, table) = prepare_source_and_table(¶ms, &tmp_dir, &parquet_dir) - .await - .expect("prepare inputs"); + let start = Instant::now(); + run_smoke_once(&table_url, ¶ms).await?; + let elapsed = start.elapsed(); - let start = Instant::now(); - let (_table, metrics) = op_fn(source, table) - .expect("build merge") - .await - .expect("execute merge"); - let elapsed = start.elapsed(); + println!( + "smoke_duration_ms={} table_uri={}", + elapsed.as_millis(), + table_url + ); + } + } - println!("duration_ms={} metrics={:?}", elapsed.as_millis(), metrics) + Ok(()) } diff --git a/crates/benchmarks/src/merge.rs b/crates/benchmarks/src/merge.rs new file mode 100644 index 0000000000..430ede0f62 --- /dev/null +++ b/crates/benchmarks/src/merge.rs @@ -0,0 +1,186 @@ +use std::path::Path; + +use deltalake_core::datafusion::functions::expr_fn; +use deltalake_core::datafusion::{ + logical_expr::{cast, lit}, + prelude::{DataFrame, ParquetReadOptions, SessionContext}, +}; +use deltalake_core::kernel::engine::arrow_conversion::TryIntoKernel; +use deltalake_core::kernel::{StructField, StructType}; +use deltalake_core::operations::merge::MergeBuilder; +use deltalake_core::DeltaTableError; +use deltalake_core::{arrow, DeltaOps, DeltaResult, DeltaTable}; +use tempfile::TempDir; +use url::Url; + +pub type MergeOp = fn(DataFrame, DeltaTable) -> Result; + +#[derive(Debug, Clone)] +pub struct MergePerfParams { + pub sample_matched_rows: f32, + pub sample_not_matched_rows: f32, +} + +pub fn merge_upsert(source: DataFrame, table: DeltaTable) -> Result { + DeltaOps(table) + .merge( + source, + "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number", + ) + .with_source_alias("source") + .with_target_alias("target") + .when_matched_update(|update| { + update + .update("wr_returned_date_sk", "source.wr_returned_date_sk") + .update("wr_returned_time_sk", "source.wr_returned_time_sk") + .update("wr_item_sk", "source.wr_item_sk") + .update("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") + .update("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") + .update("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") + .update("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") + .update("wr_returning_customer_sk", "source.wr_returning_customer_sk") + .update("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") + .update("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") + .update("wr_returning_addr_sk", "source.wr_returning_addr_sk") + .update("wr_web_page_sk", "source.wr_web_page_sk") + .update("wr_reason_sk", "source.wr_reason_sk") + .update("wr_order_number", "source.wr_order_number") + .update("wr_return_quantity", "source.wr_return_quantity") + .update("wr_return_amt", "source.wr_return_amt") + .update("wr_return_tax", "source.wr_return_tax") + .update("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") + .update("wr_fee", "source.wr_fee") + .update("wr_return_ship_cost", "source.wr_return_ship_cost") + .update("wr_refunded_cash", "source.wr_refunded_cash") + .update("wr_reversed_charge", "source.wr_reversed_charge") + .update("wr_account_credit", "source.wr_account_credit") + .update("wr_net_loss", "source.wr_net_loss") + })? + .when_not_matched_insert(|insert| { + insert + .set("wr_returned_date_sk", "source.wr_returned_date_sk") + .set("wr_returned_time_sk", "source.wr_returned_time_sk") + .set("wr_item_sk", "source.wr_item_sk") + .set("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") + .set("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") + .set("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") + .set("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") + .set("wr_returning_customer_sk", "source.wr_returning_customer_sk") + .set("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") + .set("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") + .set("wr_returning_addr_sk", "source.wr_returning_addr_sk") + .set("wr_web_page_sk", "source.wr_web_page_sk") + .set("wr_reason_sk", "source.wr_reason_sk") + .set("wr_order_number", "source.wr_order_number") + .set("wr_return_quantity", "source.wr_return_quantity") + .set("wr_return_amt", "source.wr_return_amt") + .set("wr_return_tax", "source.wr_return_tax") + .set("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") + .set("wr_fee", "source.wr_fee") + .set("wr_return_ship_cost", "source.wr_return_ship_cost") + .set("wr_refunded_cash", "source.wr_refunded_cash") + .set("wr_reversed_charge", "source.wr_reversed_charge") + .set("wr_account_credit", "source.wr_account_credit") + .set("wr_net_loss", "source.wr_net_loss") + }) +} + +pub fn merge_insert(source: DataFrame, table: DeltaTable) -> Result { + DeltaOps(table) + .merge( + source, + "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number", + ) + .with_source_alias("source") + .with_target_alias("target") + .when_not_matched_insert(|insert| { + insert + .set("wr_returned_date_sk", "source.wr_returned_date_sk") + .set("wr_returned_time_sk", "source.wr_returned_time_sk") + .set("wr_item_sk", "source.wr_item_sk") + .set("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") + .set("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") + .set("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") + .set("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") + .set("wr_returning_customer_sk", "source.wr_returning_customer_sk") + .set("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") + .set("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") + .set("wr_returning_addr_sk", "source.wr_returning_addr_sk") + .set("wr_web_page_sk", "source.wr_web_page_sk") + .set("wr_reason_sk", "source.wr_reason_sk") + .set("wr_order_number", "source.wr_order_number") + .set("wr_return_quantity", "source.wr_return_quantity") + .set("wr_return_amt", "source.wr_return_amt") + .set("wr_return_tax", "source.wr_return_tax") + .set("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") + .set("wr_fee", "source.wr_fee") + .set("wr_return_ship_cost", "source.wr_return_ship_cost") + .set("wr_refunded_cash", "source.wr_refunded_cash") + .set("wr_reversed_charge", "source.wr_reversed_charge") + .set("wr_account_credit", "source.wr_account_credit") + .set("wr_net_loss", "source.wr_net_loss") + }) +} + +pub fn merge_delete(source: DataFrame, table: DeltaTable) -> Result { + DeltaOps(table) + .merge( + source, + "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number", + ) + .with_source_alias("source") + .with_target_alias("target") + .when_matched_delete(|delete| delete) +} + +pub async fn prepare_source_and_table( + params: &MergePerfParams, + tmp_dir: &TempDir, + parquet_dir: &Path, +) -> DeltaResult<(DataFrame, DeltaTable)> { + let ctx = SessionContext::new(); + + let parquet_path = parquet_dir + .join("web_returns.parquet") + .to_str() + .unwrap() + .to_owned(); + + let parquet_df = ctx + .read_parquet(&parquet_path, ParquetReadOptions::default()) + .await?; + let temp_table_url = Url::from_directory_path(tmp_dir).unwrap(); + + let schema = parquet_df.schema(); + let delta_schema: StructType = schema.as_arrow().try_into_kernel().unwrap(); + + let batches = parquet_df.collect().await?; + let fields: Vec = delta_schema.fields().cloned().collect(); + let table = DeltaOps::try_from_uri(temp_table_url) + .await? + .create() + .with_columns(fields) + .await?; + + let table = DeltaOps(table).write(batches).await?; + + let source = ctx + .read_parquet(&parquet_path, ParquetReadOptions::default()) + .await?; + + let matched = source + .clone() + .filter(expr_fn::random().lt_eq(lit(params.sample_matched_rows)))?; + + let rand = cast( + expr_fn::random() * lit(u32::MAX), + arrow::datatypes::DataType::Int64, + ); + let not_matched = source + .filter(expr_fn::random().lt_eq(lit(params.sample_not_matched_rows)))? + .with_column("wr_item_sk", rand.clone())? + .with_column("wr_order_number", rand)?; + + let source = matched.union(not_matched)?; + Ok((source, table)) +} diff --git a/crates/benchmarks/src/smoke.rs b/crates/benchmarks/src/smoke.rs new file mode 100644 index 0000000000..5f509dfaab --- /dev/null +++ b/crates/benchmarks/src/smoke.rs @@ -0,0 +1,87 @@ +use std::sync::Arc; + +use deltalake_core::arrow; +use deltalake_core::datafusion::prelude::SessionContext; +use deltalake_core::delta_datafusion::{DeltaScanConfigBuilder, DeltaTableProvider}; +use deltalake_core::protocol::SaveMode; +use deltalake_core::{DeltaOps, DeltaResult, DeltaTableError}; +use url::Url; + +#[derive(Debug, Clone)] +pub struct SmokeParams { + pub rows: usize, +} + +pub async fn run_smoke_once(table_url: &Url, params: &SmokeParams) -> DeltaResult<()> { + if params.rows > i32::MAX as usize { + return Err(DeltaTableError::generic( + "smoke benchmark supports at most i32::MAX rows", + )); + } + + let schema = Arc::new(arrow::datatypes::Schema::new(vec![ + arrow::datatypes::Field::new("id", arrow::datatypes::DataType::Int32, false), + arrow::datatypes::Field::new("value", arrow::datatypes::DataType::Utf8, false), + ])); + + let ids: Vec = (0..params.rows).map(|i| i as i32).collect(); + let values: Vec = ids.iter().map(|id| format!("value_{id}")).collect(); + + let batch = arrow::record_batch::RecordBatch::try_new( + schema, + vec![ + Arc::new(arrow::array::Int32Array::from(ids)), + Arc::new(arrow::array::StringArray::from(values)), + ], + )?; + + let table = DeltaOps::try_from_uri(table_url.clone()) + .await? + .write(vec![batch]) + .with_save_mode(SaveMode::Overwrite) + .await?; + + let snapshot = table.snapshot()?.snapshot().clone(); + let config = DeltaScanConfigBuilder::new().build(&snapshot)?; + let provider = DeltaTableProvider::try_new(snapshot, table.log_store(), config)?; + + let ctx = SessionContext::new(); + ctx.register_table("smoke", Arc::new(provider))?; + + let df = ctx.sql("SELECT id, value FROM smoke ORDER BY id").await?; + let batches = df.collect().await?; + + let mut total_rows = 0usize; + for batch in &batches { + let ids = batch + .column(0) + .as_any() + .downcast_ref::() + .ok_or_else(|| DeltaTableError::generic("unexpected column type for id"))?; + let values = batch + .column(1) + .as_any() + .downcast_ref::() + .ok_or_else(|| DeltaTableError::generic("unexpected column type for value"))?; + + for i in 0..batch.num_rows() { + let id = ids.value(i) as usize; + let expected_value = format!("value_{id}"); + if values.value(i) != expected_value { + return Err(DeltaTableError::generic( + "unexpected value returned from smoke table", + )); + } + } + total_rows += batch.num_rows(); + } + + if total_rows != params.rows { + return Err(DeltaTableError::generic(format!( + "expected {} rows, found {} in smoke table", + params.rows, total_rows + ))); + } + + Ok(()) +} From 2cca8c90e6ecfd6896938fa7139223ab4a9bf303 Mon Sep 17 00:00:00 2001 From: Abhi Agarwal Date: Mon, 13 Oct 2025 20:46:18 -0400 Subject: [PATCH 2/5] Extract merge benchmark to CLI Signed-off-by: Abhi Agarwal --- crates/benchmarks/Cargo.toml | 5 + crates/benchmarks/README.md | 27 +- crates/benchmarks/benches/merge.rs | 80 ++--- crates/benchmarks/src/lib.rs | 4 +- crates/benchmarks/src/main.rs | 114 ++++++-- crates/benchmarks/src/merge.rs | 455 +++++++++++++++++++++++------ 6 files changed, 506 insertions(+), 179 deletions(-) diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index 58c7aa11fd..f948f680e0 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -14,6 +14,7 @@ clap = { version = "4", features = ["derive"] } tokio = { workspace = true, features = ["fs", "macros", "rt", "io-util"] } url = { workspace = true } tempfile = { workspace = true } +anyhow = "1" [dependencies.deltalake-core] path = "../core" @@ -26,3 +27,7 @@ divan = "0.1" [[bench]] name = "merge" harness = false + +[[bench]] +name = "smoke" +harness = false diff --git a/crates/benchmarks/README.md b/crates/benchmarks/README.md index 523ecd1161..2cc181d94d 100644 --- a/crates/benchmarks/README.md +++ b/crates/benchmarks/README.md @@ -48,13 +48,34 @@ A simple CLI is available to run a single merge with configurable parameters (us Run (from repo root): ```bash -cargo run --profile profiling -p delta-benchmarks -- upsert --matched 0.01 --not-matched 0.10 +cargo run --profile profiling -p delta-benchmarks -- merge --op upsert --matched 0.01 --not-matched 0.10 ``` Options: -- `upsert | delete | insert`: operation to benchmark +- `--op `: operation to benchmark - `--matched `: fraction of rows that match existing keys (default 0.01) - `--not-matched `: fraction of rows that do not match (default 0.10) +- `--case `: run one of the predefined merge scenarios mirrored from the Delta Spark suite + +List cases with: +```bash +cargo run --release -p delta-benchmarks -- merge --case single_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_0.05 +``` + +## TPC-DS query helper + +All 99 TPC-DS SQL statements (matching the Spark benchmark suite) are stored under `queries/tpcds`. The CLI can list or print them: + +```bash +cargo run --release -p delta-benchmarks -- tpcds --list +cargo run --release -p delta-benchmarks -- tpcds --case q1 +``` + +There is also a micro-benchmark that iterates over every query string to ensure the include paths stay wired correctly: + +```bash +cargo bench -p delta-benchmarks --bench tpcds +``` ### Flamegraphs using `samply` @@ -66,4 +87,4 @@ To start, cargo install samply --locked cargo build --profile profiling -p delta-benchmarks samply record ./target/profiling/delta-benchmarks upsert -``` \ No newline at end of file +``` diff --git a/crates/benchmarks/benches/merge.rs b/crates/benchmarks/benches/merge.rs index 5ef6660c4b..be72720f86 100644 --- a/crates/benchmarks/benches/merge.rs +++ b/crates/benchmarks/benches/merge.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use delta_benchmarks::{ - merge_delete, merge_insert, merge_upsert, prepare_source_and_table, MergeOp, MergePerfParams, + delete_only_cases, insert_only_cases, prepare_source_and_table, upsert_cases, MergeTestCase, }; use divan::{AllocProfiler, Bencher}; @@ -13,8 +13,11 @@ fn main() { #[global_allocator] static ALLOC: AllocProfiler = AllocProfiler::system(); -fn bench_merge(bencher: Bencher, op: MergeOp, params: &MergePerfParams) { +fn bench_merge_case(bencher: Bencher, case: &MergeTestCase) { let rt = tokio::runtime::Runtime::new().unwrap(); + let case_copy = *case; + let params = case_copy.params; + bencher .with_inputs(|| { let tmp_dir = tempfile::tempdir().unwrap(); @@ -23,74 +26,33 @@ fn bench_merge(bencher: Bencher, op: MergeOp, params: &MergePerfParams) { .unwrap_or_else(|_| "data/tpcds_parquet".to_string()), ); rt.block_on(async move { - let (source, table) = prepare_source_and_table(params, &tmp_dir, &parquet_dir) + let (source, table) = prepare_source_and_table(¶ms, &tmp_dir, &parquet_dir) .await - .unwrap(); - (source, table, tmp_dir) + .expect("prepare inputs"); + (case, source, table, tmp_dir) }) }) - .bench_local_values(|(source, table, tmp_dir)| { + .bench_local_values(|(case, source, table, tmp_dir)| { rt.block_on(async move { - let _ = divan::black_box(op(source, table).unwrap().await.unwrap()); + let (_, metrics) = case.execute(source, table).await.expect("execute merge"); + case.validate(&metrics).expect("validate merge"); + divan::black_box(metrics.num_target_rows_inserted); }); drop(tmp_dir); }); } -#[divan::bench(args = [ - MergePerfParams { - sample_matched_rows: 0.05, - sample_not_matched_rows: 0.0, - } -])] -fn delete_only(bencher: Bencher, params: &MergePerfParams) { - bench_merge(bencher, merge_delete, params); +#[divan::bench(args = insert_only_cases())] +fn insert_only(bencher: Bencher, case: &MergeTestCase) { + bench_merge_case(bencher, case); } -#[divan::bench(args = [ - MergePerfParams { - sample_matched_rows: 0.00, - sample_not_matched_rows: 0.05, - }, - MergePerfParams { - sample_matched_rows: 0.00, - sample_not_matched_rows: 0.50, - }, - MergePerfParams { - sample_matched_rows: 0.00, - sample_not_matched_rows: 1.0, - }, -])] -fn multiple_insert_only(bencher: Bencher, params: &MergePerfParams) { - bench_merge(bencher, merge_insert, params); +#[divan::bench(args = delete_only_cases())] +fn delete_only(bencher: Bencher, case: &MergeTestCase) { + bench_merge_case(bencher, case); } -#[divan::bench(args = [ - MergePerfParams { - sample_matched_rows: 0.01, - sample_not_matched_rows: 0.1, - }, - MergePerfParams { - sample_matched_rows: 0.1, - sample_not_matched_rows: 0.0, - }, - MergePerfParams { - sample_matched_rows: 0.1, - sample_not_matched_rows: 0.01, - }, - MergePerfParams { - sample_matched_rows: 0.5, - sample_not_matched_rows: 0.001, - }, - MergePerfParams { - sample_matched_rows: 0.99, - sample_not_matched_rows: 0.001, - }, - MergePerfParams { - sample_matched_rows: 0.001, - sample_not_matched_rows: 0.001, - }, -])] -fn upsert_file_matched(bencher: Bencher, params: &MergePerfParams) { - bench_merge(bencher, merge_upsert, params); +#[divan::bench(args = upsert_cases())] +fn upsert(bencher: Bencher, case: &MergeTestCase) { + bench_merge_case(bencher, case); } diff --git a/crates/benchmarks/src/lib.rs b/crates/benchmarks/src/lib.rs index e0cad5b356..3663e19651 100644 --- a/crates/benchmarks/src/lib.rs +++ b/crates/benchmarks/src/lib.rs @@ -2,6 +2,8 @@ pub mod merge; pub mod smoke; pub use merge::{ - merge_delete, merge_insert, merge_upsert, prepare_source_and_table, MergeOp, MergePerfParams, + delete_only_cases, insert_only_cases, merge_case_by_name, merge_case_names, merge_delete, + merge_insert, merge_test_cases, merge_upsert, prepare_source_and_table, upsert_cases, MergeOp, + MergePerfParams, MergeScenario, MergeTestCase, }; pub use smoke::{run_smoke_once, SmokeParams}; diff --git a/crates/benchmarks/src/main.rs b/crates/benchmarks/src/main.rs index d1accd04e3..e4e70e71f2 100644 --- a/crates/benchmarks/src/main.rs +++ b/crates/benchmarks/src/main.rs @@ -3,8 +3,8 @@ use std::{path::PathBuf, time::Instant}; use clap::{Parser, Subcommand, ValueEnum}; use delta_benchmarks::{ - merge_delete, merge_insert, merge_upsert, prepare_source_and_table, run_smoke_once, MergeOp, - MergePerfParams, SmokeParams, + merge_case_by_name, merge_case_names, merge_delete, merge_insert, merge_upsert, + prepare_source_and_table, run_smoke_once, MergeOp, MergePerfParams, MergeTestCase, SmokeParams, }; use deltalake_core::ensure_table_uri; @@ -28,7 +28,7 @@ enum Command { Merge { /// Operation to benchmark #[arg(value_enum)] - op: OpKind, + op: Option, /// Fraction of rows that match an existing key (0.0-1.0) #[arg(long, default_value_t = 0.01)] @@ -37,6 +37,10 @@ enum Command { /// Fraction of rows that do not match (0.0-1.0) #[arg(long, default_value_t = 0.10)] not_matched: f32, + + /// Named test case to run (overrides manual parameters) + #[arg(long)] + case: Option, }, /// Run the smoke workload to validate delta-rs read/write operations @@ -60,36 +64,36 @@ async fn main() -> anyhow::Result<()> { op, matched, not_matched, + case, } => { - let op_fn: MergeOp = match op { - OpKind::Upsert => merge_upsert, - OpKind::Delete => merge_delete, - OpKind::Insert => merge_insert, - }; - - let params = MergePerfParams { - sample_matched_rows: matched, - sample_not_matched_rows: not_matched, - }; - - let tmp_dir = tempfile::tempdir()?; - - let parquet_dir = PathBuf::from( - std::env::var("TPCDS_PARQUET_DIR") - .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), - ); - - let (source, table) = prepare_source_and_table(¶ms, &tmp_dir, &parquet_dir).await?; - - let start = Instant::now(); - let (_table, metrics) = op_fn(source, table)?.await?; - let elapsed = start.elapsed(); - - println!( - "merge_duration_ms={} metrics={:?}", - elapsed.as_millis(), - metrics - ); + if let Some(case_name) = case.as_deref() { + let merge_case = merge_case_by_name(case_name).ok_or_else(|| { + anyhow::anyhow!( + "unknown merge case '{}'. Available cases: {}", + case_name, + merge_case_names().join(", ") + ) + })?; + + run_merge_case(merge_case).await?; + } else { + let op = op.ok_or_else(|| { + anyhow::anyhow!("specify an operation (upsert/delete/insert) or provide --case") + })?; + + let op_fn: MergeOp = match op { + OpKind::Upsert => merge_upsert, + OpKind::Delete => merge_delete, + OpKind::Insert => merge_insert, + }; + + let params = MergePerfParams { + sample_matched_rows: matched, + sample_not_matched_rows: not_matched, + }; + + run_merge_with_params(op_fn, ¶ms).await?; + } } Command::Smoke { rows, table_path } => { let params = SmokeParams { rows }; @@ -116,3 +120,49 @@ async fn main() -> anyhow::Result<()> { Ok(()) } + +async fn run_merge_with_params(op_fn: MergeOp, params: &MergePerfParams) -> anyhow::Result<()> { + let tmp_dir = tempfile::tempdir()?; + let parquet_dir = PathBuf::from( + std::env::var("TPCDS_PARQUET_DIR") + .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), + ); + + let (source, table) = prepare_source_and_table(params, &tmp_dir, &parquet_dir).await?; + + let start = Instant::now(); + let (_table, metrics) = op_fn(source, table)?.await?; + let elapsed = start.elapsed(); + + println!( + "merge_duration_ms={} metrics={:?}", + elapsed.as_millis(), + metrics + ); + + Ok(()) +} + +async fn run_merge_case(case: &MergeTestCase) -> anyhow::Result<()> { + let tmp_dir = tempfile::tempdir()?; + let parquet_dir = PathBuf::from( + std::env::var("TPCDS_PARQUET_DIR") + .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), + ); + + let (source, table) = prepare_source_and_table(&case.params, &tmp_dir, &parquet_dir).await?; + + let start = Instant::now(); + let (_table, metrics) = case.execute(source, table).await?; + case.validate(&metrics)?; + let elapsed = start.elapsed(); + + println!( + "merge_case={} merge_duration_ms={} metrics={:?}", + case.name, + elapsed.as_millis(), + metrics + ); + + Ok(()) +} diff --git a/crates/benchmarks/src/merge.rs b/crates/benchmarks/src/merge.rs index 430ede0f62..666d5fe97b 100644 --- a/crates/benchmarks/src/merge.rs +++ b/crates/benchmarks/src/merge.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::path::Path; use deltalake_core::datafusion::functions::expr_fn; @@ -7,20 +8,367 @@ use deltalake_core::datafusion::{ }; use deltalake_core::kernel::engine::arrow_conversion::TryIntoKernel; use deltalake_core::kernel::{StructField, StructType}; -use deltalake_core::operations::merge::MergeBuilder; -use deltalake_core::DeltaTableError; -use deltalake_core::{arrow, DeltaOps, DeltaResult, DeltaTable}; +use deltalake_core::operations::merge::{InsertBuilder, MergeBuilder, MergeMetrics, UpdateBuilder}; +use deltalake_core::{arrow, DeltaOps, DeltaResult, DeltaTable, DeltaTableError}; use tempfile::TempDir; use url::Url; pub type MergeOp = fn(DataFrame, DeltaTable) -> Result; -#[derive(Debug, Clone)] +#[derive(Clone, Copy, Debug)] pub struct MergePerfParams { pub sample_matched_rows: f32, pub sample_not_matched_rows: f32, } +#[derive(Clone, Copy)] +pub enum MergeScenario { + SingleInsertOnly, + MultipleInsertOnly, + DeleteOnly, + Upsert, +} + +type MergeValidator = fn(&MergeMetrics, &MergeTestCase) -> DeltaResult<()>; + +#[derive(Clone, Copy)] +pub struct MergeTestCase { + pub name: &'static str, + pub scenario: MergeScenario, + pub params: MergePerfParams, + validator: MergeValidator, +} + +impl fmt::Debug for MergeTestCase { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("MergeTestCase") + .field("name", &self.name) + .finish_non_exhaustive() + } +} + +impl MergeTestCase { + fn builder( + &self, + source: DataFrame, + table: DeltaTable, + ) -> Result { + match self.scenario { + MergeScenario::SingleInsertOnly => merge_insert(source, table), + MergeScenario::MultipleInsertOnly => merge_multiple_insert(source, table), + MergeScenario::DeleteOnly => merge_delete(source, table), + MergeScenario::Upsert => merge_upsert(source, table), + } + } + + pub async fn execute( + &self, + source: DataFrame, + table: DeltaTable, + ) -> DeltaResult<(DeltaTable, MergeMetrics)> { + self.builder(source, table)?.await + } + + pub fn validate(&self, metrics: &MergeMetrics) -> DeltaResult<()> { + (self.validator)(metrics, self) + } +} + +fn validate_insert_only(metrics: &MergeMetrics, case: &MergeTestCase) -> DeltaResult<()> { + ensure_zero( + metrics.num_target_rows_updated, + "num_target_rows_updated", + case, + )?; + ensure_zero( + metrics.num_target_rows_deleted, + "num_target_rows_deleted", + case, + ) +} + +fn validate_delete_only(metrics: &MergeMetrics, case: &MergeTestCase) -> DeltaResult<()> { + ensure_zero( + metrics.num_target_rows_inserted, + "num_target_rows_inserted", + case, + )?; + ensure_zero( + metrics.num_target_rows_updated, + "num_target_rows_updated", + case, + ) +} + +fn validate_upsert(metrics: &MergeMetrics, case: &MergeTestCase) -> DeltaResult<()> { + ensure_zero( + metrics.num_target_rows_deleted, + "num_target_rows_deleted", + case, + ) +} + +fn ensure_zero(value: usize, field: &str, case: &MergeTestCase) -> DeltaResult<()> { + if value == 0 { + Ok(()) + } else { + Err(DeltaTableError::generic(format!( + "case '{}' expected {} == 0, found {}", + case.name, field, value + ))) + } +} + +const INSERT_ONLY_CASES: [MergeTestCase; 6] = [ + MergeTestCase { + name: "single_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_0.05", + scenario: MergeScenario::SingleInsertOnly, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 0.05, + }, + validator: validate_insert_only, + }, + MergeTestCase { + name: "single_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_0.5", + scenario: MergeScenario::SingleInsertOnly, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 0.5, + }, + validator: validate_insert_only, + }, + MergeTestCase { + name: "single_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_1.0", + scenario: MergeScenario::SingleInsertOnly, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 1.0, + }, + validator: validate_insert_only, + }, + MergeTestCase { + name: "multiple_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_0.05", + scenario: MergeScenario::MultipleInsertOnly, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 0.05, + }, + validator: validate_insert_only, + }, + MergeTestCase { + name: "multiple_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_0.5", + scenario: MergeScenario::MultipleInsertOnly, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 0.5, + }, + validator: validate_insert_only, + }, + MergeTestCase { + name: "multiple_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_1.0", + scenario: MergeScenario::MultipleInsertOnly, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 1.0, + }, + validator: validate_insert_only, + }, +]; + +const DELETE_ONLY_CASES: [MergeTestCase; 1] = [MergeTestCase { + name: "delete_only_filesMatchedFraction_0.05_rowsMatchedFraction_0.05", + scenario: MergeScenario::DeleteOnly, + params: MergePerfParams { + sample_matched_rows: 0.05, + sample_not_matched_rows: 0.0, + }, + validator: validate_delete_only, +}]; + +const UPSERT_CASES: [MergeTestCase; 9] = [ + MergeTestCase { + name: "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_0.0_rowsNotMatchedFraction_0.1", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.0, + sample_not_matched_rows: 0.1, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: + "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_0.01_rowsNotMatchedFraction_0.1", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.01, + sample_not_matched_rows: 0.1, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_0.1_rowsNotMatchedFraction_0.1", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.1, + sample_not_matched_rows: 0.1, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: + "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_0.5_rowsNotMatchedFraction_0.001", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.5, + sample_not_matched_rows: 0.001, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: + "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_0.99_rowsNotMatchedFraction_0.001", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.99, + sample_not_matched_rows: 0.001, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: + "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_1.0_rowsNotMatchedFraction_0.001", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 1.0, + sample_not_matched_rows: 0.001, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: "upsert_filesMatchedFraction_0.05_rowsMatchedFraction_0.1_rowsNotMatchedFraction_0.0", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.1, + sample_not_matched_rows: 0.0, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: + "upsert_filesMatchedFraction_0.5_rowsMatchedFraction_0.01_rowsNotMatchedFraction_0.001", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.01, + sample_not_matched_rows: 0.001, + }, + validator: validate_upsert, + }, + MergeTestCase { + name: + "upsert_filesMatchedFraction_1.0_rowsMatchedFraction_0.01_rowsNotMatchedFraction_0.001", + scenario: MergeScenario::Upsert, + params: MergePerfParams { + sample_matched_rows: 0.01, + sample_not_matched_rows: 0.001, + }, + validator: validate_upsert, + }, +]; + +fn all_cases_iter() -> impl Iterator { + INSERT_ONLY_CASES + .iter() + .chain(DELETE_ONLY_CASES.iter()) + .chain(UPSERT_CASES.iter()) +} + +pub fn insert_only_cases() -> &'static [MergeTestCase] { + &INSERT_ONLY_CASES +} + +pub fn delete_only_cases() -> &'static [MergeTestCase] { + &DELETE_ONLY_CASES +} + +pub fn upsert_cases() -> &'static [MergeTestCase] { + &UPSERT_CASES +} + +pub fn merge_case_names() -> Vec<&'static str> { + all_cases_iter().map(|case| case.name).collect() +} + +pub fn merge_case_by_name(name: &str) -> Option<&'static MergeTestCase> { + all_cases_iter().find(|case| case.name.eq_ignore_ascii_case(name)) +} + +pub fn merge_test_cases() -> Vec<&'static MergeTestCase> { + all_cases_iter().collect() +} + +fn apply_insert_projection(builder: InsertBuilder) -> InsertBuilder { + builder + .set("wr_returned_date_sk", "source.wr_returned_date_sk") + .set("wr_returned_time_sk", "source.wr_returned_time_sk") + .set("wr_item_sk", "source.wr_item_sk") + .set("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") + .set("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") + .set("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") + .set("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") + .set( + "wr_returning_customer_sk", + "source.wr_returning_customer_sk", + ) + .set("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") + .set("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") + .set("wr_returning_addr_sk", "source.wr_returning_addr_sk") + .set("wr_web_page_sk", "source.wr_web_page_sk") + .set("wr_reason_sk", "source.wr_reason_sk") + .set("wr_order_number", "source.wr_order_number") + .set("wr_return_quantity", "source.wr_return_quantity") + .set("wr_return_amt", "source.wr_return_amt") + .set("wr_return_tax", "source.wr_return_tax") + .set("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") + .set("wr_fee", "source.wr_fee") + .set("wr_return_ship_cost", "source.wr_return_ship_cost") + .set("wr_refunded_cash", "source.wr_refunded_cash") + .set("wr_reversed_charge", "source.wr_reversed_charge") + .set("wr_account_credit", "source.wr_account_credit") + .set("wr_net_loss", "source.wr_net_loss") +} + +fn apply_update_projection(builder: UpdateBuilder) -> UpdateBuilder { + builder + .update("wr_returned_date_sk", "source.wr_returned_date_sk") + .update("wr_returned_time_sk", "source.wr_returned_time_sk") + .update("wr_item_sk", "source.wr_item_sk") + .update("wr_refunded_customer_sk", "source.wr_refunded_customer_sk") + .update("wr_refunded_cdemo_sk", "source.wr_refunded_cdemo_sk") + .update("wr_refunded_hdemo_sk", "source.wr_refunded_hdemo_sk") + .update("wr_refunded_addr_sk", "source.wr_refunded_addr_sk") + .update( + "wr_returning_customer_sk", + "source.wr_returning_customer_sk", + ) + .update("wr_returning_cdemo_sk", "source.wr_returning_cdemo_sk") + .update("wr_returning_hdemo_sk", "source.wr_returning_hdemo_sk") + .update("wr_returning_addr_sk", "source.wr_returning_addr_sk") + .update("wr_web_page_sk", "source.wr_web_page_sk") + .update("wr_reason_sk", "source.wr_reason_sk") + .update("wr_order_number", "source.wr_order_number") + .update("wr_return_quantity", "source.wr_return_quantity") + .update("wr_return_amt", "source.wr_return_amt") + .update("wr_return_tax", "source.wr_return_tax") + .update("wr_return_amt_inc_tax", "source.wr_return_amt_inc_tax") + .update("wr_fee", "source.wr_fee") + .update("wr_return_ship_cost", "source.wr_return_ship_cost") + .update("wr_refunded_cash", "source.wr_refunded_cash") + .update("wr_reversed_charge", "source.wr_reversed_charge") + .update("wr_account_credit", "source.wr_account_credit") + .update("wr_net_loss", "source.wr_net_loss") +} + pub fn merge_upsert(source: DataFrame, table: DeltaTable) -> Result { DeltaOps(table) .merge( @@ -29,63 +377,25 @@ pub fn merge_upsert(source: DataFrame, table: DeltaTable) -> Result Result { + DeltaOps(table) + .merge( + source, + "source.wr_item_sk = target.wr_item_sk and source.wr_order_number = target.wr_order_number", + ) + .with_source_alias("source") + .with_target_alias("target") + .when_not_matched_insert(|insert| apply_insert_projection(insert)) +} + +fn merge_multiple_insert( + source: DataFrame, + table: DeltaTable, +) -> Result { DeltaOps(table) .merge( source, @@ -94,32 +404,9 @@ pub fn merge_insert(source: DataFrame, table: DeltaTable) -> Result Result { From 1913ade149517bb8577cc11fb8634f475d7ef1bc Mon Sep 17 00:00:00 2001 From: Abhi Agarwal Date: Mon, 13 Oct 2025 21:49:21 -0400 Subject: [PATCH 3/5] Port TPC-DS benchmarks Signed-off-by: Abhi Agarwal --- crates/benchmarks/Cargo.toml | 4 + crates/benchmarks/README.md | 15 -- crates/benchmarks/benches/tpcds.rs | 42 ++++ crates/benchmarks/queries/README.md | 1 + crates/benchmarks/queries/tpcds/q1.sql | 26 +++ crates/benchmarks/queries/tpcds/q10.sql | 60 +++++ crates/benchmarks/queries/tpcds/q11.sql | 82 +++++++ crates/benchmarks/queries/tpcds/q12.sql | 35 +++ crates/benchmarks/queries/tpcds/q13.sql | 53 +++++ crates/benchmarks/queries/tpcds/q14.sql | 211 +++++++++++++++++ crates/benchmarks/queries/tpcds/q15.sql | 21 ++ crates/benchmarks/queries/tpcds/q16.sql | 32 +++ crates/benchmarks/queries/tpcds/q17.sql | 46 ++++ crates/benchmarks/queries/tpcds/q18.sql | 35 +++ crates/benchmarks/queries/tpcds/q19.sql | 26 +++ crates/benchmarks/queries/tpcds/q2.sql | 61 +++++ crates/benchmarks/queries/tpcds/q20.sql | 31 +++ crates/benchmarks/queries/tpcds/q21.sql | 31 +++ crates/benchmarks/queries/tpcds/q22.sql | 21 ++ crates/benchmarks/queries/tpcds/q23.sql | 108 +++++++++ crates/benchmarks/queries/tpcds/q24.sql | 108 +++++++++ crates/benchmarks/queries/tpcds/q25.sql | 49 ++++ crates/benchmarks/queries/tpcds/q26.sql | 22 ++ crates/benchmarks/queries/tpcds/q27.sql | 24 ++ crates/benchmarks/queries/tpcds/q28.sql | 54 +++++ crates/benchmarks/queries/tpcds/q29.sql | 48 ++++ crates/benchmarks/queries/tpcds/q3.sql | 22 ++ crates/benchmarks/queries/tpcds/q30.sql | 32 +++ crates/benchmarks/queries/tpcds/q31.sql | 53 +++++ crates/benchmarks/queries/tpcds/q32.sql | 29 +++ crates/benchmarks/queries/tpcds/q33.sql | 76 ++++++ crates/benchmarks/queries/tpcds/q34.sql | 32 +++ crates/benchmarks/queries/tpcds/q35.sql | 59 +++++ crates/benchmarks/queries/tpcds/q36.sql | 31 +++ crates/benchmarks/queries/tpcds/q37.sql | 18 ++ crates/benchmarks/queries/tpcds/q38.sql | 24 ++ crates/benchmarks/queries/tpcds/q39.sql | 55 +++++ crates/benchmarks/queries/tpcds/q4.sql | 117 ++++++++++ crates/benchmarks/queries/tpcds/q40.sql | 29 +++ crates/benchmarks/queries/tpcds/q41.sql | 53 +++++ crates/benchmarks/queries/tpcds/q42.sql | 23 ++ crates/benchmarks/queries/tpcds/q43.sql | 20 ++ crates/benchmarks/queries/tpcds/q44.sql | 36 +++ crates/benchmarks/queries/tpcds/q45.sql | 21 ++ crates/benchmarks/queries/tpcds/q46.sql | 36 +++ crates/benchmarks/queries/tpcds/q47.sql | 52 +++++ crates/benchmarks/queries/tpcds/q48.sql | 68 ++++++ crates/benchmarks/queries/tpcds/q49.sql | 130 +++++++++++ crates/benchmarks/queries/tpcds/q5.sql | 129 ++++++++++ crates/benchmarks/queries/tpcds/q50.sql | 60 +++++ crates/benchmarks/queries/tpcds/q51.sql | 46 ++++ crates/benchmarks/queries/tpcds/q52.sql | 23 ++ crates/benchmarks/queries/tpcds/q53.sql | 29 +++ crates/benchmarks/queries/tpcds/q54.sql | 57 +++++ crates/benchmarks/queries/tpcds/q55.sql | 15 ++ crates/benchmarks/queries/tpcds/q56.sql | 70 ++++++ crates/benchmarks/queries/tpcds/q57.sql | 49 ++++ crates/benchmarks/queries/tpcds/q58.sql | 66 ++++++ crates/benchmarks/queries/tpcds/q59.sql | 45 ++++ crates/benchmarks/queries/tpcds/q6.sql | 27 +++ crates/benchmarks/queries/tpcds/q60.sql | 79 +++++++ crates/benchmarks/queries/tpcds/q61.sql | 45 ++++ crates/benchmarks/queries/tpcds/q62.sql | 36 +++ crates/benchmarks/queries/tpcds/q63.sql | 30 +++ crates/benchmarks/queries/tpcds/q64.sql | 122 ++++++++++ crates/benchmarks/queries/tpcds/q65.sql | 30 +++ crates/benchmarks/queries/tpcds/q66.sql | 221 ++++++++++++++++++ crates/benchmarks/queries/tpcds/q67.sql | 45 ++++ crates/benchmarks/queries/tpcds/q68.sql | 43 ++++ crates/benchmarks/queries/tpcds/q69.sql | 48 ++++ crates/benchmarks/queries/tpcds/q7.sql | 22 ++ crates/benchmarks/queries/tpcds/q70.sql | 39 ++++ crates/benchmarks/queries/tpcds/q71.sql | 41 ++++ crates/benchmarks/queries/tpcds/q72.sql | 30 +++ .../queries/tpcds/q72_optimized.sql | 32 +++ crates/benchmarks/queries/tpcds/q73.sql | 29 +++ crates/benchmarks/queries/tpcds/q74.sql | 62 +++++ crates/benchmarks/queries/tpcds/q75.sql | 71 ++++++ crates/benchmarks/queries/tpcds/q76.sql | 25 ++ crates/benchmarks/queries/tpcds/q77.sql | 109 +++++++++ crates/benchmarks/queries/tpcds/q78.sql | 59 +++++ crates/benchmarks/queries/tpcds/q79.sql | 24 ++ crates/benchmarks/queries/tpcds/q8.sql | 109 +++++++++ crates/benchmarks/queries/tpcds/q80.sql | 97 ++++++++ crates/benchmarks/queries/tpcds/q81.sql | 32 +++ crates/benchmarks/queries/tpcds/q82.sql | 18 ++ crates/benchmarks/queries/tpcds/q83.sql | 68 ++++++ crates/benchmarks/queries/tpcds/q84.sql | 22 ++ crates/benchmarks/queries/tpcds/q85.sql | 85 +++++++ crates/benchmarks/queries/tpcds/q86.sql | 27 +++ crates/benchmarks/queries/tpcds/q87.sql | 24 ++ crates/benchmarks/queries/tpcds/q88.sql | 95 ++++++++ crates/benchmarks/queries/tpcds/q89.sql | 29 +++ crates/benchmarks/queries/tpcds/q9.sql | 52 +++++ crates/benchmarks/queries/tpcds/q90.sql | 23 ++ crates/benchmarks/queries/tpcds/q91.sql | 32 +++ crates/benchmarks/queries/tpcds/q92.sql | 31 +++ crates/benchmarks/queries/tpcds/q93.sql | 19 ++ crates/benchmarks/queries/tpcds/q94.sql | 30 +++ crates/benchmarks/queries/tpcds/q95.sql | 33 +++ crates/benchmarks/queries/tpcds/q96.sql | 17 ++ crates/benchmarks/queries/tpcds/q97.sql | 26 +++ crates/benchmarks/queries/tpcds/q98.sql | 34 +++ crates/benchmarks/queries/tpcds/q99.sql | 36 +++ crates/benchmarks/src/lib.rs | 4 + crates/benchmarks/src/main.rs | 75 +++++- crates/benchmarks/src/merge.rs | 8 +- crates/benchmarks/src/tpcds_queries.rs | 214 +++++++++++++++++ 108 files changed, 5340 insertions(+), 20 deletions(-) create mode 100644 crates/benchmarks/benches/tpcds.rs create mode 100644 crates/benchmarks/queries/README.md create mode 100644 crates/benchmarks/queries/tpcds/q1.sql create mode 100644 crates/benchmarks/queries/tpcds/q10.sql create mode 100644 crates/benchmarks/queries/tpcds/q11.sql create mode 100644 crates/benchmarks/queries/tpcds/q12.sql create mode 100644 crates/benchmarks/queries/tpcds/q13.sql create mode 100644 crates/benchmarks/queries/tpcds/q14.sql create mode 100644 crates/benchmarks/queries/tpcds/q15.sql create mode 100644 crates/benchmarks/queries/tpcds/q16.sql create mode 100644 crates/benchmarks/queries/tpcds/q17.sql create mode 100644 crates/benchmarks/queries/tpcds/q18.sql create mode 100644 crates/benchmarks/queries/tpcds/q19.sql create mode 100644 crates/benchmarks/queries/tpcds/q2.sql create mode 100644 crates/benchmarks/queries/tpcds/q20.sql create mode 100644 crates/benchmarks/queries/tpcds/q21.sql create mode 100644 crates/benchmarks/queries/tpcds/q22.sql create mode 100644 crates/benchmarks/queries/tpcds/q23.sql create mode 100644 crates/benchmarks/queries/tpcds/q24.sql create mode 100644 crates/benchmarks/queries/tpcds/q25.sql create mode 100644 crates/benchmarks/queries/tpcds/q26.sql create mode 100644 crates/benchmarks/queries/tpcds/q27.sql create mode 100644 crates/benchmarks/queries/tpcds/q28.sql create mode 100644 crates/benchmarks/queries/tpcds/q29.sql create mode 100644 crates/benchmarks/queries/tpcds/q3.sql create mode 100644 crates/benchmarks/queries/tpcds/q30.sql create mode 100644 crates/benchmarks/queries/tpcds/q31.sql create mode 100644 crates/benchmarks/queries/tpcds/q32.sql create mode 100644 crates/benchmarks/queries/tpcds/q33.sql create mode 100644 crates/benchmarks/queries/tpcds/q34.sql create mode 100644 crates/benchmarks/queries/tpcds/q35.sql create mode 100644 crates/benchmarks/queries/tpcds/q36.sql create mode 100644 crates/benchmarks/queries/tpcds/q37.sql create mode 100644 crates/benchmarks/queries/tpcds/q38.sql create mode 100644 crates/benchmarks/queries/tpcds/q39.sql create mode 100644 crates/benchmarks/queries/tpcds/q4.sql create mode 100644 crates/benchmarks/queries/tpcds/q40.sql create mode 100644 crates/benchmarks/queries/tpcds/q41.sql create mode 100644 crates/benchmarks/queries/tpcds/q42.sql create mode 100644 crates/benchmarks/queries/tpcds/q43.sql create mode 100644 crates/benchmarks/queries/tpcds/q44.sql create mode 100644 crates/benchmarks/queries/tpcds/q45.sql create mode 100644 crates/benchmarks/queries/tpcds/q46.sql create mode 100644 crates/benchmarks/queries/tpcds/q47.sql create mode 100644 crates/benchmarks/queries/tpcds/q48.sql create mode 100644 crates/benchmarks/queries/tpcds/q49.sql create mode 100644 crates/benchmarks/queries/tpcds/q5.sql create mode 100644 crates/benchmarks/queries/tpcds/q50.sql create mode 100644 crates/benchmarks/queries/tpcds/q51.sql create mode 100644 crates/benchmarks/queries/tpcds/q52.sql create mode 100644 crates/benchmarks/queries/tpcds/q53.sql create mode 100644 crates/benchmarks/queries/tpcds/q54.sql create mode 100644 crates/benchmarks/queries/tpcds/q55.sql create mode 100644 crates/benchmarks/queries/tpcds/q56.sql create mode 100644 crates/benchmarks/queries/tpcds/q57.sql create mode 100644 crates/benchmarks/queries/tpcds/q58.sql create mode 100644 crates/benchmarks/queries/tpcds/q59.sql create mode 100644 crates/benchmarks/queries/tpcds/q6.sql create mode 100644 crates/benchmarks/queries/tpcds/q60.sql create mode 100644 crates/benchmarks/queries/tpcds/q61.sql create mode 100644 crates/benchmarks/queries/tpcds/q62.sql create mode 100644 crates/benchmarks/queries/tpcds/q63.sql create mode 100644 crates/benchmarks/queries/tpcds/q64.sql create mode 100644 crates/benchmarks/queries/tpcds/q65.sql create mode 100644 crates/benchmarks/queries/tpcds/q66.sql create mode 100644 crates/benchmarks/queries/tpcds/q67.sql create mode 100644 crates/benchmarks/queries/tpcds/q68.sql create mode 100644 crates/benchmarks/queries/tpcds/q69.sql create mode 100644 crates/benchmarks/queries/tpcds/q7.sql create mode 100644 crates/benchmarks/queries/tpcds/q70.sql create mode 100644 crates/benchmarks/queries/tpcds/q71.sql create mode 100644 crates/benchmarks/queries/tpcds/q72.sql create mode 100644 crates/benchmarks/queries/tpcds/q72_optimized.sql create mode 100644 crates/benchmarks/queries/tpcds/q73.sql create mode 100644 crates/benchmarks/queries/tpcds/q74.sql create mode 100644 crates/benchmarks/queries/tpcds/q75.sql create mode 100644 crates/benchmarks/queries/tpcds/q76.sql create mode 100644 crates/benchmarks/queries/tpcds/q77.sql create mode 100644 crates/benchmarks/queries/tpcds/q78.sql create mode 100644 crates/benchmarks/queries/tpcds/q79.sql create mode 100644 crates/benchmarks/queries/tpcds/q8.sql create mode 100644 crates/benchmarks/queries/tpcds/q80.sql create mode 100644 crates/benchmarks/queries/tpcds/q81.sql create mode 100644 crates/benchmarks/queries/tpcds/q82.sql create mode 100644 crates/benchmarks/queries/tpcds/q83.sql create mode 100644 crates/benchmarks/queries/tpcds/q84.sql create mode 100644 crates/benchmarks/queries/tpcds/q85.sql create mode 100644 crates/benchmarks/queries/tpcds/q86.sql create mode 100644 crates/benchmarks/queries/tpcds/q87.sql create mode 100644 crates/benchmarks/queries/tpcds/q88.sql create mode 100644 crates/benchmarks/queries/tpcds/q89.sql create mode 100644 crates/benchmarks/queries/tpcds/q9.sql create mode 100644 crates/benchmarks/queries/tpcds/q90.sql create mode 100644 crates/benchmarks/queries/tpcds/q91.sql create mode 100644 crates/benchmarks/queries/tpcds/q92.sql create mode 100644 crates/benchmarks/queries/tpcds/q93.sql create mode 100644 crates/benchmarks/queries/tpcds/q94.sql create mode 100644 crates/benchmarks/queries/tpcds/q95.sql create mode 100644 crates/benchmarks/queries/tpcds/q96.sql create mode 100644 crates/benchmarks/queries/tpcds/q97.sql create mode 100644 crates/benchmarks/queries/tpcds/q98.sql create mode 100644 crates/benchmarks/queries/tpcds/q99.sql create mode 100644 crates/benchmarks/src/tpcds_queries.rs diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index f948f680e0..f8b99a7037 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -31,3 +31,7 @@ harness = false [[bench]] name = "smoke" harness = false + +[[bench]] +name = "tpcds" +harness = false diff --git a/crates/benchmarks/README.md b/crates/benchmarks/README.md index 2cc181d94d..506730ddab 100644 --- a/crates/benchmarks/README.md +++ b/crates/benchmarks/README.md @@ -62,21 +62,6 @@ List cases with: cargo run --release -p delta-benchmarks -- merge --case single_insert_only_filesMatchedFraction_0.05_rowsNotMatchedFraction_0.05 ``` -## TPC-DS query helper - -All 99 TPC-DS SQL statements (matching the Spark benchmark suite) are stored under `queries/tpcds`. The CLI can list or print them: - -```bash -cargo run --release -p delta-benchmarks -- tpcds --list -cargo run --release -p delta-benchmarks -- tpcds --case q1 -``` - -There is also a micro-benchmark that iterates over every query string to ensure the include paths stay wired correctly: - -```bash -cargo bench -p delta-benchmarks --bench tpcds -``` - ### Flamegraphs using `samply` Using `samply`, you can generate flamegraphs from the profile script. diff --git a/crates/benchmarks/benches/tpcds.rs b/crates/benchmarks/benches/tpcds.rs new file mode 100644 index 0000000000..9fc3c17db5 --- /dev/null +++ b/crates/benchmarks/benches/tpcds.rs @@ -0,0 +1,42 @@ +use std::path::PathBuf; + +use delta_benchmarks::{register_tpcds_tables, tpcds_query, tpcds_query_names}; +use divan::{AllocProfiler, Bencher}; + +fn main() { + divan::main(); +} + +#[global_allocator] +static ALLOC: AllocProfiler = AllocProfiler::system(); + +#[divan::bench(args = tpcds_query_names())] +fn tpcds_query_execution(bencher: Bencher, name: &'static str) { + let rt = tokio::runtime::Runtime::new().unwrap(); + let sql = tpcds_query(name) + .expect("query must exist") + .split(";") + .filter(|s| !s.trim().is_empty()) + .collect::>(); + + let tmp_dir = tempfile::tempdir().unwrap(); + let parquet_dir = PathBuf::from( + std::env::var("TPCDS_PARQUET_DIR").unwrap_or_else(|_| "data/tpcds_parquet".to_string()), + ); + + let ctx = rt.block_on(async { + register_tpcds_tables(&tmp_dir, &parquet_dir) + .await + .expect("failed to register TPC-DS tables") + }); + + bencher.bench_local(|| { + rt.block_on(async { + for sql in sql.iter() { + let df = ctx.sql(sql).await.expect("failed to create dataframe"); + divan::black_box(df.collect().await.expect("failed to execute query")); + } + }); + }); + drop(tmp_dir); +} diff --git a/crates/benchmarks/queries/README.md b/crates/benchmarks/queries/README.md new file mode 100644 index 0000000000..824d5d845a --- /dev/null +++ b/crates/benchmarks/queries/README.md @@ -0,0 +1 @@ +TPC-DS SQL is vendored from [datafusion-benchmarks](https://github.com/apache/datafusion-benchmarks) repository. \ No newline at end of file diff --git a/crates/benchmarks/queries/tpcds/q1.sql b/crates/benchmarks/queries/tpcds/q1.sql new file mode 100644 index 0000000000..00328875ab --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q1.sql @@ -0,0 +1,26 @@ +-- SQLBench-DS query 1 derived from TPC-DS query 1 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_RETURN_AMT_INC_TAX) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =1999 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'TN' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q10.sql b/crates/benchmarks/queries/tpcds/q10.sql new file mode 100644 index 0000000000..3a47920e04 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q10.sql @@ -0,0 +1,60 @@ +-- SQLBench-DS query 10 derived from TPC-DS query 10 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Clinton County','Platte County','Franklin County','Louisa County','Harmon County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 3 and 3+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 3 ANd 3+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 3 and 3+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q11.sql b/crates/benchmarks/queries/tpcds/q11.sql new file mode 100644 index 0000000000..7ffd3094f9 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q11.sql @@ -0,0 +1,82 @@ +-- SQLBench-DS query 11 derived from TPC-DS query 11 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 1999 + and t_s_secyear.dyear = 1999+1 + and t_w_firstyear.dyear = 1999 + and t_w_secyear.dyear = 1999+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q12.sql b/crates/benchmarks/queries/tpcds/q12.sql new file mode 100644 index 0000000000..eb267ca64b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q12.sql @@ -0,0 +1,35 @@ +-- SQLBench-DS query 12 derived from TPC-DS query 12 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Books', 'Women') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2002-03-22' as date) + and (cast('2002-03-22' as date) + INTERVAL '30 DAYS') +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q13.sql b/crates/benchmarks/queries/tpcds/q13.sql new file mode 100644 index 0000000000..31b1171b9e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q13.sql @@ -0,0 +1,53 @@ +-- SQLBench-DS query 13 derived from TPC-DS query 13 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = 'Unknown' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('CO', 'MI', 'MN') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('NC', 'NY', 'TX') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('CA', 'NE', 'TN') + and ss_net_profit between 50 and 250 + )) +; + diff --git a/crates/benchmarks/queries/tpcds/q14.sql b/crates/benchmarks/queries/tpcds/q14.sql new file mode 100644 index 0000000000..119791f59d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q14.sql @@ -0,0 +1,211 @@ +-- SQLBench-DS query 14 derived from TPC-DS query 14 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + LIMIT 100; +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select this_year.channel ty_channel + ,this_year.i_brand_id ty_brand + ,this_year.i_class_id ty_class + ,this_year.i_category_id ty_category + ,this_year.sales ty_sales + ,this_year.number_sales ty_number_sales + ,last_year.channel ly_channel + ,last_year.i_brand_id ly_brand + ,last_year.i_class_id ly_class + ,last_year.i_category_id ly_category + ,last_year.sales ly_sales + ,last_year.number_sales ly_number_sales + from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + 1 + and d_moy = 12 + and d_dom = 14) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel, i_brand_id,i_class_id + ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + and d_moy = 12 + and d_dom = 14) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id + and this_year.i_class_id = last_year.i_class_id + and this_year.i_category_id = last_year.i_category_id + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q15.sql b/crates/benchmarks/queries/tpcds/q15.sql new file mode 100644 index 0000000000..bb1812a07c --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q15.sql @@ -0,0 +1,21 @@ +-- SQLBench-DS query 15 derived from TPC-DS query 15 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2002 + group by ca_zip + order by ca_zip + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q16.sql b/crates/benchmarks/queries/tpcds/q16.sql new file mode 100644 index 0000000000..2e0f9a9922 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q16.sql @@ -0,0 +1,32 @@ +-- SQLBench-DS query 16 derived from TPC-DS query 16 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '1999-5-01' and + (cast('1999-5-01' as date) + INTERVAL '60 DAYS') +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'ID' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q17.sql b/crates/benchmarks/queries/tpcds/q17.sql new file mode 100644 index 0000000000..9f9e97d76e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q17.sql @@ -0,0 +1,46 @@ +-- SQLBench-DS query 17 derived from TPC-DS query 17 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '1999Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('1999Q1','1999Q2','1999Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q18.sql b/crates/benchmarks/queries/tpcds/q18.sql new file mode 100644 index 0000000000..50cc6c63f2 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q18.sql @@ -0,0 +1,35 @@ +-- SQLBench-DS query 18 derived from TPC-DS query 18 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'Primary' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (1,2,9,5,11,3) and + d_year = 1998 and + ca_state in ('MS','NE','IA' + ,'MI','GA','NY','CO') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q19.sql b/crates/benchmarks/queries/tpcds/q19.sql new file mode 100644 index 0000000000..bf54b3b802 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q19.sql @@ -0,0 +1,26 @@ +-- SQLBench-DS query 19 derived from TPC-DS query 19 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=8 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + LIMIT 100 ; + diff --git a/crates/benchmarks/queries/tpcds/q2.sql b/crates/benchmarks/queries/tpcds/q2.sql new file mode 100644 index 0000000000..838717836b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q2.sql @@ -0,0 +1,61 @@ +-- SQLBench-DS query 2 derived from TPC-DS query 2 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2000) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2000+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1; + diff --git a/crates/benchmarks/queries/tpcds/q20.sql b/crates/benchmarks/queries/tpcds/q20.sql new file mode 100644 index 0000000000..ea4747317d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q20.sql @@ -0,0 +1,31 @@ +-- SQLBench-DS query 20 derived from TPC-DS query 20 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Children', 'Sports', 'Music') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2002-04-01' as date) + and (cast('2002-04-01' as date) + INTERVAL '30 DAYS') + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q21.sql b/crates/benchmarks/queries/tpcds/q21.sql new file mode 100644 index 0000000000..d768fa1428 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q21.sql @@ -0,0 +1,31 @@ +-- SQLBench-DS query 21 derived from TPC-DS query 21 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-05-19' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('2000-05-19' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('2000-05-19' as date) - INTERVAL '30 DAYS') + and (cast ('2000-05-19' as date) + INTERVAL '30 DAYS') + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q22.sql b/crates/benchmarks/queries/tpcds/q22.sql new file mode 100644 index 0000000000..c7e1c78181 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q22.sql @@ -0,0 +1,21 @@ +-- SQLBench-DS query 22 derived from TPC-DS query 22 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1201 and 1201 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q23.sql b/crates/benchmarks/queries/tpcds/q23.sql new file mode 100644 index 0000000000..0dc7f73859 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q23.sql @@ -0,0 +1,108 @@ +-- SQLBench-DS query 23 derived from TPC-DS query 23 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 3 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 3 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + LIMIT 100; +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 3 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 3 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q24.sql b/crates/benchmarks/queries/tpcds/q24.sql new file mode 100644 index 0000000000..5d6d2f5053 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q24.sql @@ -0,0 +1,108 @@ +-- SQLBench-DS query 24 derived from TPC-DS query 24 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_profit) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name +; +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_profit) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 10 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'green' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name +; + diff --git a/crates/benchmarks/queries/tpcds/q25.sql b/crates/benchmarks/queries/tpcds/q25.sql new file mode 100644 index 0000000000..b0af0e61dd --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q25.sql @@ -0,0 +1,49 @@ +-- SQLBench-DS query 25 derived from TPC-DS query 25 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,min(ss_net_profit) as store_sales_profit + ,min(sr_net_loss) as store_returns_loss + ,min(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2002 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2002 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2002 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q26.sql b/crates/benchmarks/queries/tpcds/q26.sql new file mode 100644 index 0000000000..55ccc8b511 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q26.sql @@ -0,0 +1,22 @@ +-- SQLBench-DS query 26 derived from TPC-DS query 26 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'M' and + cd_education_status = '4 yr Degree' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2000 + group by i_item_id + order by i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q27.sql b/crates/benchmarks/queries/tpcds/q27.sql new file mode 100644 index 0000000000..6d28e4e663 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q27.sql @@ -0,0 +1,24 @@ +-- SQLBench-DS query 27 derived from TPC-DS query 27 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = 'Secondary' and + d_year = 2000 and + s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q28.sql b/crates/benchmarks/queries/tpcds/q28.sql new file mode 100644 index 0000000000..6efa7d7d77 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q28.sql @@ -0,0 +1,54 @@ +-- SQLBench-DS query 28 derived from TPC-DS query 28 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 12573 and 12573+1000 + or ss_wholesale_cost between 33 and 33+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 143 and 143+10 + or ss_coupon_amt between 5562 and 5562+1000 + or ss_wholesale_cost between 45 and 45+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 159 and 159+10 + or ss_coupon_amt between 2807 and 2807+1000 + or ss_wholesale_cost between 24 and 24+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 24 and 24+10 + or ss_coupon_amt between 3706 and 3706+1000 + or ss_wholesale_cost between 46 and 46+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 76 and 76+10 + or ss_coupon_amt between 2096 and 2096+1000 + or ss_wholesale_cost between 50 and 50+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 169 and 169+10 + or ss_coupon_amt between 10672 and 10672+1000 + or ss_wholesale_cost between 58 and 58+20)) B6 + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q29.sql b/crates/benchmarks/queries/tpcds/q29.sql new file mode 100644 index 0000000000..8d463f3771 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q29.sql @@ -0,0 +1,48 @@ +-- SQLBench-DS query 29 derived from TPC-DS query 29 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,stddev_samp(ss_quantity) as store_sales_quantity + ,stddev_samp(sr_return_quantity) as store_returns_quantity + ,stddev_samp(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q3.sql b/crates/benchmarks/queries/tpcds/q3.sql new file mode 100644 index 0000000000..d6a55cb8cf --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q3.sql @@ -0,0 +1,22 @@ +-- SQLBench-DS query 3 derived from TPC-DS query 3 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_net_profit) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 445 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q30.sql b/crates/benchmarks/queries/tpcds/q30.sql new file mode 100644 index 0000000000..7004078a50 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q30.sql @@ -0,0 +1,32 @@ +-- SQLBench-DS query 30 derived from TPC-DS query 30 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2000 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date_sk,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'KS' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date_sk,ctr_total_return + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q31.sql b/crates/benchmarks/queries/tpcds/q31.sql new file mode 100644 index 0000000000..89aba18998 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q31.sql @@ -0,0 +1,53 @@ +-- SQLBench-DS query 31 derived from TPC-DS query 31 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 1999 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 1999 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 1999 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 1999 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 1999 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =1999 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.ca_county; + diff --git a/crates/benchmarks/queries/tpcds/q32.sql b/crates/benchmarks/queries/tpcds/q32.sql new file mode 100644 index 0000000000..419dcd0b05 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q32.sql @@ -0,0 +1,29 @@ +-- SQLBench-DS query 32 derived from TPC-DS query 32 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select sum(cs_ext_discount_amt) as `excess discount amount` +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 283 +and i_item_sk = cs_item_sk +and d_date between '1999-02-22' and + (cast('1999-02-22' as date) + INTERVAL '90 DAYS') +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '1999-02-22' and + (cast('1999-02-22' as date) + INTERVAL '90 DAYS') + and d_date_sk = cs_sold_date_sk + ) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q33.sql b/crates/benchmarks/queries/tpcds/q33.sql new file mode 100644 index 0000000000..1aabc472b7 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q33.sql @@ -0,0 +1,76 @@ +-- SQLBench-DS query 33 derived from TPC-DS query 33 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 4 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 4 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Books')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 4 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q34.sql b/crates/benchmarks/queries/tpcds/q34.sql new file mode 100644 index 0000000000..f61caa51a3 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q34.sql @@ -0,0 +1,32 @@ +-- SQLBench-DS query 34 derived from TPC-DS query 34 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '501-1000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number; + diff --git a/crates/benchmarks/queries/tpcds/q35.sql b/crates/benchmarks/queries/tpcds/q35.sql new file mode 100644 index 0000000000..eb41c40381 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q35.sql @@ -0,0 +1,59 @@ +-- SQLBench-DS query 35 derived from TPC-DS query 35 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + min(cd_dep_count), + max(cd_dep_count), + avg(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + min(cd_dep_employed_count), + max(cd_dep_employed_count), + avg(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + min(cd_dep_college_count), + max(cd_dep_college_count), + avg(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2000 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2000 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2000 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q36.sql b/crates/benchmarks/queries/tpcds/q36.sql new file mode 100644 index 0000000000..889fff5d14 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q36.sql @@ -0,0 +1,31 @@ +-- SQLBench-DS query 36 derived from TPC-DS query 36 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 2001 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('TN','TN','TN','TN', + 'TN','TN','TN','TN') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q37.sql b/crates/benchmarks/queries/tpcds/q37.sql new file mode 100644 index 0000000000..bdd12dc82e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q37.sql @@ -0,0 +1,18 @@ +-- SQLBench-DS query 37 derived from TPC-DS query 37 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 26 and 26 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2001-06-09' as date) and (cast('2001-06-09' as date) + INTERVAL '60 DAYS') + and i_manufact_id in (744,884,722,693) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q38.sql b/crates/benchmarks/queries/tpcds/q38.sql new file mode 100644 index 0000000000..03e4e07635 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q38.sql @@ -0,0 +1,24 @@ +-- SQLBench-DS query 38 derived from TPC-DS query 38 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1190 and 1190 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1190 and 1190 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1190 and 1190 + 11 +) hot_cust + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q39.sql b/crates/benchmarks/queries/tpcds/q39.sql new file mode 100644 index 0000000000..f49c223eba --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q39.sql @@ -0,0 +1,55 @@ +-- SQLBench-DS query 39 derived from TPC-DS query 39 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2001 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=1 + and inv2.d_moy=1+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +; +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2001 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=1 + and inv2.d_moy=1+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,inv2.d_moy,inv2.mean, inv2.cov +; + diff --git a/crates/benchmarks/queries/tpcds/q4.sql b/crates/benchmarks/queries/tpcds/q4.sql new file mode 100644 index 0000000000..08643201a5 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q4.sql @@ -0,0 +1,117 @@ +-- SQLBench-DS query 4 derived from TPC-DS query 4 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_email_address + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q40.sql b/crates/benchmarks/queries/tpcds/q40.sql new file mode 100644 index 0000000000..7f54a9bbdf --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q40.sql @@ -0,0 +1,29 @@ +-- SQLBench-DS query 40 derived from TPC-DS query 40 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2002-05-18' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2002-05-18' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between (cast ('2002-05-18' as date) - INTERVAL '30 DAYS') + and (cast ('2002-05-18' as date) + INTERVAL '30 DAYS') + group by + w_state,i_item_id + order by w_state,i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q41.sql b/crates/benchmarks/queries/tpcds/q41.sql new file mode 100644 index 0000000000..d561cdba50 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q41.sql @@ -0,0 +1,53 @@ +-- SQLBench-DS query 41 derived from TPC-DS query 41 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select distinct(i_product_name) + from item i1 + where i_manufact_id between 668 and 668+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'cream' or i_color = 'ghost') and + (i_units = 'Ton' or i_units = 'Gross') and + (i_size = 'economy' or i_size = 'small') + ) or + (i_category = 'Women' and + (i_color = 'midnight' or i_color = 'burlywood') and + (i_units = 'Tsp' or i_units = 'Bundle') and + (i_size = 'medium' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'lavender' or i_color = 'azure') and + (i_units = 'Each' or i_units = 'Lb') and + (i_size = 'large' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'chocolate' or i_color = 'steel') and + (i_units = 'N/A' or i_units = 'Dozen') and + (i_size = 'economy' or i_size = 'small') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'floral' or i_color = 'royal') and + (i_units = 'Unknown' or i_units = 'Tbl') and + (i_size = 'economy' or i_size = 'small') + ) or + (i_category = 'Women' and + (i_color = 'navy' or i_color = 'forest') and + (i_units = 'Bunch' or i_units = 'Dram') and + (i_size = 'medium' or i_size = 'extra large') + ) or + (i_category = 'Men' and + (i_color = 'cyan' or i_color = 'indian') and + (i_units = 'Carton' or i_units = 'Cup') and + (i_size = 'large' or i_size = 'N/A') + ) or + (i_category = 'Men' and + (i_color = 'coral' or i_color = 'pale') and + (i_units = 'Pallet' or i_units = 'Gram') and + (i_size = 'economy' or i_size = 'small') + )))) > 0 + order by i_product_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q42.sql b/crates/benchmarks/queries/tpcds/q42.sql new file mode 100644 index 0000000000..ac91e7cc2b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q42.sql @@ -0,0 +1,23 @@ +-- SQLBench-DS query 42 derived from TPC-DS query 42 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category + LIMIT 100 ; + diff --git a/crates/benchmarks/queries/tpcds/q43.sql b/crates/benchmarks/queries/tpcds/q43.sql new file mode 100644 index 0000000000..ca09e8e77d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q43.sql @@ -0,0 +1,20 @@ +-- SQLBench-DS query 43 derived from TPC-DS query 43 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -5 and + d_year = 2000 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q44.sql b/crates/benchmarks/queries/tpcds/q44.sql new file mode 100644 index 0000000000..8c635cef49 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q44.sql @@ -0,0 +1,36 @@ +-- SQLBench-DS query 44 derived from TPC-DS query 44 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 6 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 6 + and ss_hdemo_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 6 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 6 + and ss_hdemo_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q45.sql b/crates/benchmarks/queries/tpcds/q45.sql new file mode 100644 index 0000000000..682cc9b54d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q45.sql @@ -0,0 +1,21 @@ +-- SQLBench-DS query 45 derived from TPC-DS query 45 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select ca_zip, ca_city, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip, ca_city + order by ca_zip, ca_city + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q46.sql b/crates/benchmarks/queries/tpcds/q46.sql new file mode 100644 index 0000000000..81ae1d5815 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q46.sql @@ -0,0 +1,36 @@ +-- SQLBench-DS query 46 derived from TPC-DS query 46 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 3 or + household_demographics.hd_vehicle_count= 1) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Midway','Fairview','Fairview','Midway','Fairview') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q47.sql b/crates/benchmarks/queries/tpcds/q47.sql new file mode 100644 index 0000000000..f741fe44cd --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q47.sql @@ -0,0 +1,52 @@ +-- SQLBench-DS query 47 derived from TPC-DS query 47 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 2001 or + ( d_year = 2001-1 and d_moy =12) or + ( d_year = 2001+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name + ,v1.d_year + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2001 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, nsum + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q48.sql b/crates/benchmarks/queries/tpcds/q48.sql new file mode 100644 index 0000000000..fb83279b13 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q48.sql @@ -0,0 +1,68 @@ +-- SQLBench-DS query 48 derived from TPC-DS query 48 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'W' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'Advanced Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = 'Primary' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('IL', 'KY', 'OR') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('VA', 'FL', 'AL') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('OK', 'IA', 'TX') + and ss_net_profit between 50 and 25000 + ) + ) +; + diff --git a/crates/benchmarks/queries/tpcds/q49.sql b/crates/benchmarks/queries/tpcds/q49.sql new file mode 100644 index 0000000000..c97286528b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q49.sql @@ -0,0 +1,130 @@ +-- SQLBench-DS query 49 derived from TPC-DS query 49 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select channel, item, return_ratio, return_rank, currency_rank from + (select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + ) + order by 1,4,5,2 + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q5.sql b/crates/benchmarks/queries/tpcds/q5.sql new file mode 100644 index 0000000000..4f2721634c --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q5.sql @@ -0,0 +1,129 @@ +-- SQLBench-DS query 5 derived from TPC-DS query 5 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2001-08-04' as date) + and (cast('2001-08-04' as date) + INTERVAL '14 DAYS') + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2001-08-04' as date) + and (cast('2001-08-04' as date) + INTERVAL '14 DAYS') + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2001-08-04' as date) + and (cast('2001-08-04' as date) + INTERVAL '14 DAYS') + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || s_store_id as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || cp_catalog_page_id as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q50.sql b/crates/benchmarks/queries/tpcds/q50.sql new file mode 100644 index 0000000000..d3dd26a156 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q50.sql @@ -0,0 +1,60 @@ +-- SQLBench-DS query 50 derived from TPC-DS query 50 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2002 +and d2.d_moy = 8 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q51.sql b/crates/benchmarks/queries/tpcds/q51.sql new file mode 100644 index 0000000000..5aeb3087b4 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q51.sql @@ -0,0 +1,46 @@ +-- SQLBench-DS query 51 derived from TPC-DS query 51 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1215 and 1215+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1215 and 1215+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q52.sql b/crates/benchmarks/queries/tpcds/q52.sql new file mode 100644 index 0000000000..b4d032baec --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q52.sql @@ -0,0 +1,23 @@ +-- SQLBench-DS query 52 derived from TPC-DS query 52 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2000 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id + LIMIT 100 ; + diff --git a/crates/benchmarks/queries/tpcds/q53.sql b/crates/benchmarks/queries/tpcds/q53.sql new file mode 100644 index 0000000000..4c87797741 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q53.sql @@ -0,0 +1,29 @@ +-- SQLBench-DS query 53 derived from TPC-DS query 53 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1197,1197+1,1197+2,1197+3,1197+4,1197+5,1197+6,1197+7,1197+8,1197+9,1197+10,1197+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q54.sql b/crates/benchmarks/queries/tpcds/q54.sql new file mode 100644 index 0000000000..4b382e1abe --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q54.sql @@ -0,0 +1,57 @@ +-- SQLBench-DS query 54 derived from TPC-DS query 54 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Men' + and i_class = 'shirts' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 4 + and d_year = 1998 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1998 and d_moy = 4) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1998 and d_moy = 4) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q55.sql b/crates/benchmarks/queries/tpcds/q55.sql new file mode 100644 index 0000000000..5dabcab05f --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q55.sql @@ -0,0 +1,15 @@ +-- SQLBench-DS query 55 derived from TPC-DS query 55 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=20 + and d_moy=12 + and d_year=1998 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id + LIMIT 100 ; + diff --git a/crates/benchmarks/queries/tpcds/q56.sql b/crates/benchmarks/queries/tpcds/q56.sql new file mode 100644 index 0000000000..d877d0b8b9 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q56.sql @@ -0,0 +1,70 @@ +-- SQLBench-DS query 56 derived from TPC-DS query 56 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('powder','goldenrod','bisque')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('powder','goldenrod','bisque')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('powder','goldenrod','bisque')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales, + i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q57.sql b/crates/benchmarks/queries/tpcds/q57.sql new file mode 100644 index 0000000000..088ddc9eeb --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q57.sql @@ -0,0 +1,49 @@ +-- SQLBench-DS query 57 derived from TPC-DS query 57 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 2000 or + ( d_year = 2000-1 and d_moy =12) or + ( d_year = 2000+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.cc_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 2000 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, psum + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q58.sql b/crates/benchmarks/queries/tpcds/q58.sql new file mode 100644 index 0000000000..05801ea4b3 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q58.sql @@ -0,0 +1,66 @@ +-- SQLBench-DS query 58 derived from TPC-DS query 58 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-02-12')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-02-12')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2000-02-12')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q59.sql b/crates/benchmarks/queries/tpcds/q59.sql new file mode 100644 index 0000000000..e10c0dbf61 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q59.sql @@ -0,0 +1,45 @@ +-- SQLBench-DS query 59 derived from TPC-DS query 59 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1206 and 1206 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1206+ 12 and 1206 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q6.sql b/crates/benchmarks/queries/tpcds/q6.sql new file mode 100644 index 0000000000..098db850c8 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q6.sql @@ -0,0 +1,27 @@ +-- SQLBench-DS query 6 derived from TPC-DS query 6 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 1998 + and d_moy = 3 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt, a.ca_state + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q60.sql b/crates/benchmarks/queries/tpcds/q60.sql new file mode 100644 index 0000000000..1e088c1605 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q60.sql @@ -0,0 +1,79 @@ +-- SQLBench-DS query 60 derived from TPC-DS query 60 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 10 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 10 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Shoes')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 10 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q61.sql b/crates/benchmarks/queries/tpcds/q61.sql new file mode 100644 index 0000000000..6d6c2a5fcb --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q61.sql @@ -0,0 +1,45 @@ +-- SQLBench-DS query 61 derived from TPC-DS query 61 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Sports' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -6 + and d_year = 2002 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -6 + and i_category = 'Sports' + and s_gmt_offset = -6 + and d_year = 2002 + and d_moy = 11) all_sales +order by promotions, total + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q62.sql b/crates/benchmarks/queries/tpcds/q62.sql new file mode 100644 index 0000000000..d0138e057b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q62.sql @@ -0,0 +1,36 @@ +-- SQLBench-DS query 62 derived from TPC-DS query 62 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1217 and 1217 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q63.sql b/crates/benchmarks/queries/tpcds/q63.sql new file mode 100644 index 0000000000..3d85a2e38b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q63.sql @@ -0,0 +1,30 @@ +-- SQLBench-DS query 63 derived from TPC-DS query 63 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1181,1181+1,1181+2,1181+3,1181+4,1181+5,1181+6,1181+7,1181+8,1181+9,1181+10,1181+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q64.sql b/crates/benchmarks/queries/tpcds/q64.sql new file mode 100644 index 0000000000..0350cdc7d0 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q64.sql @@ -0,0 +1,122 @@ +-- SQLBench-DS query 64 derived from TPC-DS query 64 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('light','cyan','burnished','green','almond','smoke') and + i_current_price between 22 and 22 + 10 and + i_current_price between 22 + 1 and 22 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear + ,cs2.cnt +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 2001 and + cs2.syear = 2001 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cs2.cnt + ,cs1.s1 + ,cs2.s1; + diff --git a/crates/benchmarks/queries/tpcds/q65.sql b/crates/benchmarks/queries/tpcds/q65.sql new file mode 100644 index 0000000000..0c13a0debf --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q65.sql @@ -0,0 +1,30 @@ +-- SQLBench-DS query 65 derived from TPC-DS query 65 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q66.sql b/crates/benchmarks/queries/tpcds/q66.sql new file mode 100644 index 0000000000..ba066a561d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q66.sql @@ -0,0 +1,221 @@ +-- SQLBench-DS query 66 derived from TPC-DS query 66 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'FEDEX' || ',' || 'GERMA' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_list_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_list_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_list_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_list_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_list_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_list_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_list_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_list_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_list_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_list_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_list_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_list_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_profit * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_profit * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_profit * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_profit * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_profit * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_profit * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_profit * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_profit * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_profit * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_profit * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_profit * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_profit * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 19072 and 19072+28800 + and sm_carrier in ('FEDEX','GERMA') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'FEDEX' || ',' || 'GERMA' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 19072 AND 19072+28800 + and sm_carrier in ('FEDEX','GERMA') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q67.sql b/crates/benchmarks/queries/tpcds/q67.sql new file mode 100644 index 0000000000..7d684e6745 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q67.sql @@ -0,0 +1,45 @@ +-- SQLBench-DS query 67 derived from TPC-DS query 67 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1194 and 1194+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q68.sql b/crates/benchmarks/queries/tpcds/q68.sql new file mode 100644 index 0000000000..242e0dbd93 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q68.sql @@ -0,0 +1,43 @@ +-- SQLBench-DS query 68 derived from TPC-DS query 68 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 8 or + household_demographics.hd_vehicle_count= 3) + and date_dim.d_year in (2000,2000+1,2000+2) + and store.s_city in ('Midway','Fairview') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q69.sql b/crates/benchmarks/queries/tpcds/q69.sql new file mode 100644 index 0000000000..4d4030cf59 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q69.sql @@ -0,0 +1,48 @@ +-- SQLBench-DS query 69 derived from TPC-DS query 69 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('IN','VA','MS') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 2 and 2+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 2 and 2+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 2 and 2+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q7.sql b/crates/benchmarks/queries/tpcds/q7.sql new file mode 100644 index 0000000000..bb58851616 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q7.sql @@ -0,0 +1,22 @@ +-- SQLBench-DS query 7 derived from TPC-DS query 7 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'M' and + cd_marital_status = 'M' and + cd_education_status = '4 yr Degree' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2001 + group by i_item_id + order by i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q70.sql b/crates/benchmarks/queries/tpcds/q70.sql new file mode 100644 index 0000000000..a8b5f1c99f --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q70.sql @@ -0,0 +1,39 @@ +-- SQLBench-DS query 70 derived from TPC-DS query 70 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1180 and 1180+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1180 and 1180+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q71.sql b/crates/benchmarks/queries/tpcds/q71.sql new file mode 100644 index 0000000000..90d00806b0 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q71.sql @@ -0,0 +1,41 @@ +-- SQLBench-DS query 71 derived from TPC-DS query 71 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=11 + and d_year=2001 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=11 + and d_year=2001 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=11 + and d_year=2001 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + ; + diff --git a/crates/benchmarks/queries/tpcds/q72.sql b/crates/benchmarks/queries/tpcds/q72.sql new file mode 100644 index 0000000000..0e31057a03 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q72.sql @@ -0,0 +1,30 @@ +-- SQLBench-DS query 72 derived from TPC-DS query 72 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '501-1000' + and d1.d_year = 1999 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q72_optimized.sql b/crates/benchmarks/queries/tpcds/q72_optimized.sql new file mode 100644 index 0000000000..a98a70e8f0 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q72_optimized.sql @@ -0,0 +1,32 @@ +-- SQLBench-DS query 72 derived from TPC-DS query 72 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. + +-- This is a modified version of q72 that changes the join order to be sensible (the original q72 +-- intentionally has a terrible join order for testing database vendors join reordering rules) + +select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales + join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) + join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) + join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) + join item on (i_item_sk = cs_item_sk) + join inventory on (cs_item_sk = inv_item_sk) + join warehouse on (w_warehouse_sk=inv_warehouse_sk) + join date_dim d2 on (inv_date_sk = d2.d_date_sk) + join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) + left outer join promotion on (cs_promo_sk=p_promo_sk) + left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > d1.d_date + 5 + and hd_buy_potential = '501-1000' + and d1.d_year = 1999 + and cd_marital_status = 'S' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq +LIMIT 100; \ No newline at end of file diff --git a/crates/benchmarks/queries/tpcds/q73.sql b/crates/benchmarks/queries/tpcds/q73.sql new file mode 100644 index 0000000000..e7879d09ff --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q73.sql @@ -0,0 +1,29 @@ +-- SQLBench-DS query 73 derived from TPC-DS query 73 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '1001-5000' or + household_demographics.hd_buy_potential = '5001-10000') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc; + diff --git a/crates/benchmarks/queries/tpcds/q74.sql b/crates/benchmarks/queries/tpcds/q74.sql new file mode 100644 index 0000000000..b9829d9d5e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q74.sql @@ -0,0 +1,62 @@ +-- SQLBench-DS query 74 derived from TPC-DS query 74 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,stddev_samp(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,stddev_samp(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 3,2,1 + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q75.sql b/crates/benchmarks/queries/tpcds/q75.sql new file mode 100644 index 0000000000..cec9da56a5 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q75.sql @@ -0,0 +1,71 @@ +-- SQLBench-DS query 75 derived from TPC-DS query 75 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Shoes' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Shoes' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Shoes') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2000 + AND prev_yr.d_year=2000-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff,sales_amt_diff + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q76.sql b/crates/benchmarks/queries/tpcds/q76.sql new file mode 100644 index 0000000000..931a1334f6 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q76.sql @@ -0,0 +1,25 @@ +-- SQLBench-DS query 76 derived from TPC-DS query 76 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_customer_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_hdemo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_hdemo_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_bill_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_bill_customer_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q77.sql b/crates/benchmarks/queries/tpcds/q77.sql new file mode 100644 index 0000000000..d04bc14bc9 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q77.sql @@ -0,0 +1,109 @@ +-- SQLBench-DS query 77 derived from TPC-DS query 77 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2001-08-11' as date) + and (cast('2001-08-11' as date) + INTERVAL '30 DAYS') + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-11' as date) + and (cast('2001-08-11' as date) + INTERVAL '30 DAYS') + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2001-08-11' as date) + and (cast('2001-08-11' as date) + INTERVAL '30 DAYS') + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-11' as date) + and (cast('2001-08-11' as date) + INTERVAL '30 DAYS') + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2001-08-11' as date) + and (cast('2001-08-11' as date) + INTERVAL '30 DAYS') + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2001-08-11' as date) + and (cast('2001-08-11' as date) + INTERVAL '30 DAYS') + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q78.sql b/crates/benchmarks/queries/tpcds/q78.sql new file mode 100644 index 0000000000..927ef63561 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q78.sql @@ -0,0 +1,59 @@ +-- SQLBench-DS query 78 derived from TPC-DS query 78 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001 +order by + ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + ratio + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q79.sql b/crates/benchmarks/queries/tpcds/q79.sql new file mode 100644 index 0000000000..568444b152 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q79.sql @@ -0,0 +1,24 @@ +-- SQLBench-DS query 79 derived from TPC-DS query 79 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 4) + and date_dim.d_dow = 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q8.sql b/crates/benchmarks/queries/tpcds/q8.sql new file mode 100644 index 0000000000..0a994b4d21 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q8.sql @@ -0,0 +1,109 @@ +-- SQLBench-DS query 8 derived from TPC-DS query 8 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '19100','41548','51640','49699','88329','55986', + '85119','19510','61020','95452','26235', + '51102','16733','42819','27823','90192', + '31905','28865','62197','23750','81398', + '95288','45114','82060','12313','25218', + '64386','46400','77230','69271','43672', + '36521','34217','13017','27936','42766', + '59233','26060','27477','39981','93402', + '74270','13932','51731','71642','17710', + '85156','21679','70840','67191','39214', + '35273','27293','17128','15458','31615', + '60706','67657','54092','32775','14683', + '32206','62543','43053','11297','58216', + '49410','14710','24501','79057','77038', + '91286','32334','46298','18326','67213', + '65382','40315','56115','80162','55956', + '81583','73588','32513','62880','12201', + '11592','17014','83832','61796','57872', + '78829','69912','48524','22016','26905', + '48511','92168','63051','25748','89786', + '98827','86404','53029','37524','14039', + '50078','34487','70142','18697','40129', + '60642','42810','62667','57183','46414', + '58463','71211','46364','34851','54884', + '25382','25239','74126','21568','84204', + '13607','82518','32982','36953','86001', + '79278','21745','64444','35199','83181', + '73255','86177','98043','90392','13882', + '47084','17859','89526','42072','20233', + '52745','75000','22044','77013','24182', + '52554','56138','43440','86100','48791', + '21883','17096','15965','31196','74903', + '19810','35763','92020','55176','54433', + '68063','71919','44384','16612','32109', + '28207','14762','89933','10930','27616', + '56809','14244','22733','33177','29784', + '74968','37887','11299','34692','85843', + '83663','95421','19323','17406','69264', + '28341','50150','79121','73974','92917', + '21229','32254','97408','46011','37169', + '18146','27296','62927','68812','47734', + '86572','12620','80252','50173','27261', + '29534','23488','42184','23695','45868', + '12910','23429','29052','63228','30731', + '15747','25827','22332','62349','56661', + '44652','51862','57007','22773','40361', + '65238','19327','17282','44708','35484', + '34064','11148','92729','22995','18833', + '77528','48917','17256','93166','68576', + '71096','56499','35096','80551','82424', + '17700','32748','78969','46820','57725', + '46179','54677','98097','62869','83959', + '66728','19716','48326','27420','53458', + '69056','84216','36688','63957','41469', + '66843','18024','81950','21911','58387', + '58103','19813','34581','55347','17171', + '35914','75043','75088','80541','26802', + '28849','22356','57721','77084','46385', + '59255','29308','65885','70673','13306', + '68788','87335','40987','31654','67560', + '92309','78116','65961','45018','16548', + '67092','21818','33716','49449','86150', + '12156','27574','43201','50977','52839', + '33234','86611','71494','17823','57172', + '59869','34086','51052','11320','39717', + '79604','24672','70555','38378','91135', + '15567','21606','74994','77168','38607', + '27384','68328','88944','40203','37893', + '42726','83549','48739','55652','27543', + '23109','98908','28831','45011','47525', + '43870','79404','35780','42136','49317', + '14574','99586','21107','14302','83882', + '81272','92552','14916','87533','86518', + '17862','30741','96288','57886','30304', + '24201','79457','36728','49833','35182', + '20108','39858','10804','47042','20439', + '54708','59027','82499','75311','26548', + '53406','92060','41152','60446','33129', + '43979','16903','60319','35550','33887', + '25463','40343','20726','44429') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 1 and d_year = 2000 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q80.sql b/crates/benchmarks/queries/tpcds/q80.sql new file mode 100644 index 0000000000..29b2f87464 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q80.sql @@ -0,0 +1,97 @@ +-- SQLBench-DS query 80 derived from TPC-DS query 80 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2002-08-04' as date) + and (cast('2002-08-04' as date) + INTERVAL '30 DAYS') + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2002-08-04' as date) + and (cast('2002-08-04' as date) + INTERVAL '30 DAYS') + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2002-08-04' as date) + and (cast('2002-08-04' as date) + INTERVAL '30 DAYS') + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , 'store' || store_id as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , 'catalog_page' || catalog_page_id as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , 'web_site' || web_site_id as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q81.sql b/crates/benchmarks/queries/tpcds/q81.sql new file mode 100644 index 0000000000..8dd4c43067 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q81.sql @@ -0,0 +1,32 @@ +-- SQLBench-DS query 81 derived from TPC-DS query 81 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'TX' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q82.sql b/crates/benchmarks/queries/tpcds/q82.sql new file mode 100644 index 0000000000..faea7a2f67 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q82.sql @@ -0,0 +1,18 @@ +-- SQLBench-DS query 82 derived from TPC-DS query 82 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 69 and 69+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('1998-06-06' as date) and (cast('1998-06-06' as date) + INTERVAL '60 DAYS') + and i_manufact_id in (105,513,180,137) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q83.sql b/crates/benchmarks/queries/tpcds/q83.sql new file mode 100644 index 0000000000..b2512ed83e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q83.sql @@ -0,0 +1,68 @@ +-- SQLBench-DS query 83 derived from TPC-DS query 83 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-04-29','2000-09-09','2000-11-02'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-04-29','2000-09-09','2000-11-02'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-04-29','2000-09-09','2000-11-02'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q84.sql b/crates/benchmarks/queries/tpcds/q84.sql new file mode 100644 index 0000000000..a07249b463 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q84.sql @@ -0,0 +1,22 @@ +-- SQLBench-DS query 84 derived from TPC-DS query 84 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select c_customer_id as customer_id + , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'White Oak' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 45626 + and ib_upper_bound <= 45626 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q85.sql b/crates/benchmarks/queries/tpcds/q85.sql new file mode 100644 index 0000000000..c529acfe9e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q85.sql @@ -0,0 +1,85 @@ +-- SQLBench-DS query 85 derived from TPC-DS query 85 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2001 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Unknown' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('SC', 'IN', 'VA') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WA', 'KS', 'KY') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('SD', 'WI', 'NE') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q86.sql b/crates/benchmarks/queries/tpcds/q86.sql new file mode 100644 index 0000000000..ed7f4f85d0 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q86.sql @@ -0,0 +1,27 @@ +-- SQLBench-DS query 86 derived from TPC-DS query 86 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1205 and 1205+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q87.sql b/crates/benchmarks/queries/tpcds/q87.sql new file mode 100644 index 0000000000..13e2d8e2e7 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q87.sql @@ -0,0 +1,24 @@ +-- SQLBench-DS query 87 derived from TPC-DS query 87 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1189 and 1189+11) +) cool_cust +; + diff --git a/crates/benchmarks/queries/tpcds/q88.sql b/crates/benchmarks/queries/tpcds/q88.sql new file mode 100644 index 0000000000..8d47334a4e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q88.sql @@ -0,0 +1,95 @@ +-- SQLBench-DS query 88 derived from TPC-DS query 88 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) + and store.s_store_name = 'ese') s8 +; + diff --git a/crates/benchmarks/queries/tpcds/q89.sql b/crates/benchmarks/queries/tpcds/q89.sql new file mode 100644 index 0000000000..ac02b6fe33 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q89.sql @@ -0,0 +1,29 @@ +-- SQLBench-DS query 89 derived from TPC-DS query 89 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2001) and + ((i_category in ('Children','Jewelry','Home') and + i_class in ('infants','birdal','flatware') + ) + or (i_category in ('Electronics','Music','Books') and + i_class in ('audio','classical','science') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q9.sql b/crates/benchmarks/queries/tpcds/q9.sql new file mode 100644 index 0000000000..cf723ccf29 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q9.sql @@ -0,0 +1,52 @@ +-- SQLBench-DS query 9 derived from TPC-DS query 9 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 31002 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 588 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 2456 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 21645 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 20553 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +; + diff --git a/crates/benchmarks/queries/tpcds/q90.sql b/crates/benchmarks/queries/tpcds/q90.sql new file mode 100644 index 0000000000..dedf5fd066 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q90.sql @@ -0,0 +1,23 @@ +-- SQLBench-DS query 90 derived from TPC-DS query 90 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 9 and 9+1 + and household_demographics.hd_dep_count = 2 + and web_page.wp_char_count between 5000 and 5200) at, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 15 and 15+1 + and household_demographics.hd_dep_count = 2 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q91.sql b/crates/benchmarks/queries/tpcds/q91.sql new file mode 100644 index 0000000000..894d41bb2b --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q91.sql @@ -0,0 +1,32 @@ +-- SQLBench-DS query 91 derived from TPC-DS query 91 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 2002 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -6 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc; + diff --git a/crates/benchmarks/queries/tpcds/q92.sql b/crates/benchmarks/queries/tpcds/q92.sql new file mode 100644 index 0000000000..171a968627 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q92.sql @@ -0,0 +1,31 @@ +-- SQLBench-DS query 92 derived from TPC-DS query 92 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + sum(ws_ext_discount_amt) as `Excess Discount Amount` +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 914 +and i_item_sk = ws_item_sk +and d_date between '2001-01-25' and + (cast('2001-01-25' as date) + INTERVAL '90 DAYS') +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2001-01-25' and + (cast('2001-01-25' as date) + INTERVAL '90 DAYS') + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q93.sql b/crates/benchmarks/queries/tpcds/q93.sql new file mode 100644 index 0000000000..31ec9e7d4e --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q93.sql @@ -0,0 +1,19 @@ +-- SQLBench-DS query 93 derived from TPC-DS query 93 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not get it on time') t + group by ss_customer_sk + order by sumsales, ss_customer_sk + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q94.sql b/crates/benchmarks/queries/tpcds/q94.sql new file mode 100644 index 0000000000..cf04e14e0d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q94.sql @@ -0,0 +1,30 @@ +-- SQLBench-DS query 94 derived from TPC-DS query 94 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-4-01' and + (cast('1999-4-01' as date) + INTERVAL '60 DAYS') +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'WI' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q95.sql b/crates/benchmarks/queries/tpcds/q95.sql new file mode 100644 index 0000000000..2db3e50327 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q95.sql @@ -0,0 +1,33 @@ +-- SQLBench-DS query 95 derived from TPC-DS query 95 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '2002-5-01' and + (cast('2002-5-01' as date) + INTERVAL '60 DAYS') +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'MA' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q96.sql b/crates/benchmarks/queries/tpcds/q96.sql new file mode 100644 index 0000000000..63c6fdbf97 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q96.sql @@ -0,0 +1,17 @@ +-- SQLBench-DS query 96 derived from TPC-DS query 96 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 5 + and store.s_store_name = 'ese' +order by count(*) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q97.sql b/crates/benchmarks/queries/tpcds/q97.sql new file mode 100644 index 0000000000..5741cc9c56 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q97.sql @@ -0,0 +1,26 @@ +-- SQLBench-DS query 97 derived from TPC-DS query 97 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1211 and 1211 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1211 and 1211 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) + LIMIT 100; + diff --git a/crates/benchmarks/queries/tpcds/q98.sql b/crates/benchmarks/queries/tpcds/q98.sql new file mode 100644 index 0000000000..86bf08b2a1 --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q98.sql @@ -0,0 +1,34 @@ +-- SQLBench-DS query 98 derived from TPC-DS query 98 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Shoes', 'Music', 'Men') + and ss_sold_date_sk = d_date_sk + and d_date between cast('2000-01-05' as date) + and (cast('2000-01-05' as date) + INTERVAL '30 DAYS') +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio; + diff --git a/crates/benchmarks/queries/tpcds/q99.sql b/crates/benchmarks/queries/tpcds/q99.sql new file mode 100644 index 0000000000..8bd1f6406d --- /dev/null +++ b/crates/benchmarks/queries/tpcds/q99.sql @@ -0,0 +1,36 @@ +-- SQLBench-DS query 99 derived from TPC-DS query 99 under the terms of the TPC Fair Use Policy. +-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council. +-- This query was generated at scale factor 1. +select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1188 and 1188 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + LIMIT 100; + diff --git a/crates/benchmarks/src/lib.rs b/crates/benchmarks/src/lib.rs index 3663e19651..07a2fb71cd 100644 --- a/crates/benchmarks/src/lib.rs +++ b/crates/benchmarks/src/lib.rs @@ -1,5 +1,6 @@ pub mod merge; pub mod smoke; +pub mod tpcds_queries; pub use merge::{ delete_only_cases, insert_only_cases, merge_case_by_name, merge_case_names, merge_delete, @@ -7,3 +8,6 @@ pub use merge::{ MergePerfParams, MergeScenario, MergeTestCase, }; pub use smoke::{run_smoke_once, SmokeParams}; +pub use tpcds_queries::{ + register_tpcds_tables, tpcds_queries, tpcds_query, tpcds_query_names, TPCDS_TABLE_NAMES, +}; diff --git a/crates/benchmarks/src/main.rs b/crates/benchmarks/src/main.rs index e4e70e71f2..2a8e48a32b 100644 --- a/crates/benchmarks/src/main.rs +++ b/crates/benchmarks/src/main.rs @@ -4,7 +4,8 @@ use clap::{Parser, Subcommand, ValueEnum}; use delta_benchmarks::{ merge_case_by_name, merge_case_names, merge_delete, merge_insert, merge_upsert, - prepare_source_and_table, run_smoke_once, MergeOp, MergePerfParams, MergeTestCase, SmokeParams, + prepare_source_and_table, register_tpcds_tables, run_smoke_once, tpcds_queries, tpcds_query, + MergeOp, MergePerfParams, MergeTestCase, SmokeParams, }; use deltalake_core::ensure_table_uri; @@ -53,6 +54,21 @@ enum Command { #[arg(long)] table_path: Option, }, + + /// Inspect the bundled TPC-DS queries + Tpcds { + /// Query identifier to print (for example `q1`) + #[arg(long)] + case: Option, + + /// List all available query identifiers + #[arg(long, conflicts_with = "case")] + list: bool, + + /// Run the query and measure execution time + #[arg(long, conflicts_with = "list", requires = "case")] + run: bool, + }, } #[tokio::main] @@ -116,6 +132,29 @@ async fn main() -> anyhow::Result<()> { table_url ); } + Command::Tpcds { case, list, run } => { + if list { + for name in tpcds_queries().keys() { + println!("{name}"); + } + } else if let Some(name) = case { + let sql = match tpcds_queries().get(name.as_str()) { + Some(sql) => sql, + None => anyhow::bail!( + "unknown TPC-DS query '{name}'. Available: {:?}", + tpcds_queries().keys(), + ), + }; + + if run { + run_tpcds_query(&name).await?; + } else { + println!("-- {name}\n{}", sql.trim()); + } + } else { + anyhow::bail!("specify --case or --list"); + } + } } Ok(()) @@ -166,3 +205,37 @@ async fn run_merge_case(case: &MergeTestCase) -> anyhow::Result<()> { Ok(()) } + +async fn run_tpcds_query(query_name: &str) -> anyhow::Result<()> { + let tmp_dir = tempfile::tempdir()?; + let parquet_dir = PathBuf::from( + std::env::var("TPCDS_PARQUET_DIR") + .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), + ); + + println!("Loading TPC-DS tables from {}...", parquet_dir.display()); + let setup_start = Instant::now(); + let ctx = register_tpcds_tables(&tmp_dir, &parquet_dir).await?; + let setup_elapsed = setup_start.elapsed(); + println!("Setup completed in {} ms", setup_elapsed.as_millis()); + + let sql = tpcds_query(query_name) + .ok_or_else(|| anyhow::anyhow!("query '{}' not found", query_name))?; + + println!("\nExecuting query {}...", query_name); + let start = Instant::now(); + let df = ctx.sql(sql).await?; + let batches = df.collect().await?; + let elapsed = start.elapsed(); + + let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum(); + + println!( + "query={} duration_ms={} rows={}", + query_name, + elapsed.as_millis(), + total_rows + ); + + Ok(()) +} diff --git a/crates/benchmarks/src/merge.rs b/crates/benchmarks/src/merge.rs index 666d5fe97b..3e322a57cb 100644 --- a/crates/benchmarks/src/merge.rs +++ b/crates/benchmarks/src/merge.rs @@ -377,8 +377,8 @@ pub fn merge_upsert(source: DataFrame, table: DeltaTable) -> Result Result { @@ -389,7 +389,7 @@ pub fn merge_insert(source: DataFrame, table: DeltaTable) -> Result Result { diff --git a/crates/benchmarks/src/tpcds_queries.rs b/crates/benchmarks/src/tpcds_queries.rs new file mode 100644 index 0000000000..ce02b53a9b --- /dev/null +++ b/crates/benchmarks/src/tpcds_queries.rs @@ -0,0 +1,214 @@ +use std::collections::HashMap; +use std::sync::LazyLock; + +use std::path::Path; +use std::sync::Arc; + +use deltalake_core::datafusion::prelude::{ParquetReadOptions, SessionContext}; +use deltalake_core::delta_datafusion::{DeltaScanConfigBuilder, DeltaTableProvider}; +use deltalake_core::kernel::engine::arrow_conversion::TryIntoKernel; +use deltalake_core::kernel::{StructField, StructType}; +use deltalake_core::{DeltaOps, DeltaResult}; +use tempfile::TempDir; +use tokio::fs::create_dir; +use url::Url; + +macro_rules! include_query { + ($path:literal) => { + include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/", $path)) + }; +} + +pub const TPCDS_TABLE_NAMES: &[&str] = &[ + "call_center", + "catalog_page", + "catalog_returns", + "catalog_sales", + "customer", + "customer_address", + "customer_demographics", + "date_dim", + "household_demographics", + "income_band", + "inventory", + "item", + "promotion", + "reason", + "ship_mode", + "store", + "store_returns", + "store_sales", + "time_dim", + "warehouse", + "web_page", + "web_returns", + "web_sales", + "web_site", +]; + +pub async fn register_tpcds_tables( + tmp_dir: &TempDir, + parquet_dir: &Path, +) -> DeltaResult { + let ctx = SessionContext::new(); + for table_name in TPCDS_TABLE_NAMES { + let parquet_path = parquet_dir + .join(format!("{}.parquet", table_name)) + .to_str() + .unwrap() + .to_string(); + + let parquet_df = ctx + .read_parquet(parquet_path, ParquetReadOptions::default()) + .await?; + + let temp_table_dir = tmp_dir.path().join(table_name); + create_dir(&temp_table_dir).await?; + let temp_table_url = Url::from_directory_path(&temp_table_dir).unwrap(); + + let schema = parquet_df.schema(); + let delta_schema: StructType = schema.as_arrow().try_into_kernel().unwrap(); + + let batches = parquet_df.collect().await?; + let fields: Vec = delta_schema.fields().cloned().collect(); + let table = DeltaOps::try_from_uri(temp_table_url) + .await? + .create() + .with_columns(fields) + .await?; + + let table = DeltaOps(table).write(batches).await?; + + let snapshot = table.snapshot()?.snapshot().clone(); + let config = DeltaScanConfigBuilder::new().build(&snapshot)?; + let provider = DeltaTableProvider::try_new(snapshot, table.log_store(), config)?; + + ctx.register_table(*table_name, Arc::new(provider))?; + } + + Ok(ctx) +} + +type QueryMap = HashMap<&'static str, &'static str>; + +pub fn tpcds_queries() -> &'static QueryMap { + static HASH_MAP: LazyLock = LazyLock::new(|| { + let mut map = HashMap::new(); + for (k, v) in TPCDS_QUERIES_ENTRIES { + map.insert(*k, *v); + } + map + }); + &HASH_MAP +} + +pub fn tpcds_query(name: &str) -> Option<&'static str> { + tpcds_queries().get(name).copied() +} + +pub fn tpcds_query_names() -> Vec<&'static str> { + TPCDS_QUERIES_ENTRIES.iter().map(|(k, _)| *k).collect() +} + +static TPCDS_QUERIES_ENTRIES: &[(&str, &str)] = &[ + ("q1", include_query!("queries/tpcds/q1.sql")), + ("q2", include_query!("queries/tpcds/q2.sql")), + ("q3", include_query!("queries/tpcds/q3.sql")), + ("q4", include_query!("queries/tpcds/q4.sql")), + ("q5", include_query!("queries/tpcds/q5.sql")), + ("q6", include_query!("queries/tpcds/q6.sql")), + ("q7", include_query!("queries/tpcds/q7.sql")), + ("q8", include_query!("queries/tpcds/q8.sql")), + ("q9", include_query!("queries/tpcds/q9.sql")), + ("q10", include_query!("queries/tpcds/q10.sql")), + ("q11", include_query!("queries/tpcds/q11.sql")), + ("q12", include_query!("queries/tpcds/q12.sql")), + ("q13", include_query!("queries/tpcds/q13.sql")), + ("q14", include_query!("queries/tpcds/q14.sql")), + ("q15", include_query!("queries/tpcds/q15.sql")), + ("q16", include_query!("queries/tpcds/q16.sql")), + ("q17", include_query!("queries/tpcds/q17.sql")), + ("q18", include_query!("queries/tpcds/q18.sql")), + ("q19", include_query!("queries/tpcds/q19.sql")), + ("q20", include_query!("queries/tpcds/q20.sql")), + ("q21", include_query!("queries/tpcds/q21.sql")), + ("q22", include_query!("queries/tpcds/q22.sql")), + ("q23", include_query!("queries/tpcds/q23.sql")), + ("q24", include_query!("queries/tpcds/q24.sql")), + ("q25", include_query!("queries/tpcds/q25.sql")), + ("q26", include_query!("queries/tpcds/q26.sql")), + ("q27", include_query!("queries/tpcds/q27.sql")), + ("q28", include_query!("queries/tpcds/q28.sql")), + ("q29", include_query!("queries/tpcds/q29.sql")), + ("q30", include_query!("queries/tpcds/q30.sql")), + ("q31", include_query!("queries/tpcds/q31.sql")), + ("q32", include_query!("queries/tpcds/q32.sql")), + ("q33", include_query!("queries/tpcds/q33.sql")), + ("q34", include_query!("queries/tpcds/q34.sql")), + ("q35", include_query!("queries/tpcds/q35.sql")), + ("q36", include_query!("queries/tpcds/q36.sql")), + ("q37", include_query!("queries/tpcds/q37.sql")), + ("q38", include_query!("queries/tpcds/q38.sql")), + ("q39", include_query!("queries/tpcds/q39.sql")), + ("q40", include_query!("queries/tpcds/q40.sql")), + ("q41", include_query!("queries/tpcds/q41.sql")), + ("q42", include_query!("queries/tpcds/q42.sql")), + ("q43", include_query!("queries/tpcds/q43.sql")), + ("q44", include_query!("queries/tpcds/q44.sql")), + ("q45", include_query!("queries/tpcds/q45.sql")), + ("q46", include_query!("queries/tpcds/q46.sql")), + ("q47", include_query!("queries/tpcds/q47.sql")), + ("q48", include_query!("queries/tpcds/q48.sql")), + ("q49", include_query!("queries/tpcds/q49.sql")), + ("q50", include_query!("queries/tpcds/q50.sql")), + ("q51", include_query!("queries/tpcds/q51.sql")), + ("q52", include_query!("queries/tpcds/q52.sql")), + ("q53", include_query!("queries/tpcds/q53.sql")), + ("q54", include_query!("queries/tpcds/q54.sql")), + ("q55", include_query!("queries/tpcds/q55.sql")), + ("q56", include_query!("queries/tpcds/q56.sql")), + ("q57", include_query!("queries/tpcds/q57.sql")), + ("q58", include_query!("queries/tpcds/q58.sql")), + ("q59", include_query!("queries/tpcds/q59.sql")), + ("q60", include_query!("queries/tpcds/q60.sql")), + ("q61", include_query!("queries/tpcds/q61.sql")), + ("q62", include_query!("queries/tpcds/q62.sql")), + ("q63", include_query!("queries/tpcds/q63.sql")), + ("q64", include_query!("queries/tpcds/q64.sql")), + ("q65", include_query!("queries/tpcds/q65.sql")), + ("q66", include_query!("queries/tpcds/q66.sql")), + ("q67", include_query!("queries/tpcds/q67.sql")), + ("q68", include_query!("queries/tpcds/q68.sql")), + ("q69", include_query!("queries/tpcds/q69.sql")), + ("q70", include_query!("queries/tpcds/q70.sql")), + ("q71", include_query!("queries/tpcds/q71.sql")), + // disabled due to upstream datafusion: https://github.com/apache/datafusion/issues/4763 + // ("q72", include_query!("queries/tpcds/q72.sql")), + ("q73", include_query!("queries/tpcds/q73.sql")), + ("q74", include_query!("queries/tpcds/q74.sql")), + ("q75", include_query!("queries/tpcds/q75.sql")), + ("q76", include_query!("queries/tpcds/q76.sql")), + ("q77", include_query!("queries/tpcds/q77.sql")), + ("q78", include_query!("queries/tpcds/q78.sql")), + ("q79", include_query!("queries/tpcds/q79.sql")), + ("q80", include_query!("queries/tpcds/q80.sql")), + ("q81", include_query!("queries/tpcds/q81.sql")), + ("q82", include_query!("queries/tpcds/q82.sql")), + ("q83", include_query!("queries/tpcds/q83.sql")), + ("q84", include_query!("queries/tpcds/q84.sql")), + ("q85", include_query!("queries/tpcds/q85.sql")), + ("q86", include_query!("queries/tpcds/q86.sql")), + ("q87", include_query!("queries/tpcds/q87.sql")), + ("q88", include_query!("queries/tpcds/q88.sql")), + ("q89", include_query!("queries/tpcds/q89.sql")), + ("q90", include_query!("queries/tpcds/q90.sql")), + ("q91", include_query!("queries/tpcds/q91.sql")), + ("q92", include_query!("queries/tpcds/q92.sql")), + ("q93", include_query!("queries/tpcds/q93.sql")), + ("q94", include_query!("queries/tpcds/q94.sql")), + ("q95", include_query!("queries/tpcds/q95.sql")), + ("q96", include_query!("queries/tpcds/q96.sql")), + ("q97", include_query!("queries/tpcds/q97.sql")), + ("q98", include_query!("queries/tpcds/q98.sql")), + ("q99", include_query!("queries/tpcds/q99.sql")), +]; From 069d48ee2f2a140b895d64cf6e0467b191b2423e Mon Sep 17 00:00:00 2001 From: Abhi Agarwal Date: Mon, 13 Oct 2025 22:20:45 -0400 Subject: [PATCH 4/5] Cleanup Signed-off-by: Abhi Agarwal --- crates/benchmarks/benches/merge.rs | 14 ++++----- crates/benchmarks/src/main.rs | 48 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/crates/benchmarks/benches/merge.rs b/crates/benchmarks/benches/merge.rs index be72720f86..1cd7857047 100644 --- a/crates/benchmarks/benches/merge.rs +++ b/crates/benchmarks/benches/merge.rs @@ -15,9 +15,6 @@ static ALLOC: AllocProfiler = AllocProfiler::system(); fn bench_merge_case(bencher: Bencher, case: &MergeTestCase) { let rt = tokio::runtime::Runtime::new().unwrap(); - let case_copy = *case; - let params = case_copy.params; - bencher .with_inputs(|| { let tmp_dir = tempfile::tempdir().unwrap(); @@ -26,17 +23,16 @@ fn bench_merge_case(bencher: Bencher, case: &MergeTestCase) { .unwrap_or_else(|_| "data/tpcds_parquet".to_string()), ); rt.block_on(async move { - let (source, table) = prepare_source_and_table(¶ms, &tmp_dir, &parquet_dir) - .await - .expect("prepare inputs"); + let (source, table) = + prepare_source_and_table(&case.params, &tmp_dir, &parquet_dir) + .await + .expect("prepare inputs"); (case, source, table, tmp_dir) }) }) .bench_local_values(|(case, source, table, tmp_dir)| { rt.block_on(async move { - let (_, metrics) = case.execute(source, table).await.expect("execute merge"); - case.validate(&metrics).expect("validate merge"); - divan::black_box(metrics.num_target_rows_inserted); + divan::black_box(case.execute(source, table).await.expect("execute merge")); }); drop(tmp_dir); }); diff --git a/crates/benchmarks/src/main.rs b/crates/benchmarks/src/main.rs index 2a8e48a32b..256130e7e3 100644 --- a/crates/benchmarks/src/main.rs +++ b/crates/benchmarks/src/main.rs @@ -1,4 +1,7 @@ -use std::{path::PathBuf, time::Instant}; +use std::{ + path::{Path, PathBuf}, + time::Instant, +}; use clap::{Parser, Subcommand, ValueEnum}; @@ -21,6 +24,14 @@ enum OpKind { struct Cli { #[command(subcommand)] command: Command, + + /// Path to the parquet directory + #[arg( + long, + env = "TPCDS_PARQUET_DIR", + default_value = "crates/benchmarks/data/tpcds_parquet" + )] + parquet_dir: PathBuf, } #[derive(Debug, Subcommand)] @@ -75,6 +86,8 @@ enum Command { async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); + let parquet_dir = cli.parquet_dir; + match cli.command { Command::Merge { op, @@ -91,7 +104,7 @@ async fn main() -> anyhow::Result<()> { ) })?; - run_merge_case(merge_case).await?; + run_merge_case(merge_case, &parquet_dir).await?; } else { let op = op.ok_or_else(|| { anyhow::anyhow!("specify an operation (upsert/delete/insert) or provide --case") @@ -108,7 +121,7 @@ async fn main() -> anyhow::Result<()> { sample_not_matched_rows: not_matched, }; - run_merge_with_params(op_fn, ¶ms).await?; + run_merge_with_params(op_fn, ¶ms, &parquet_dir).await?; } } Command::Smoke { rows, table_path } => { @@ -147,7 +160,7 @@ async fn main() -> anyhow::Result<()> { }; if run { - run_tpcds_query(&name).await?; + run_tpcds_query(&name, &parquet_dir).await?; } else { println!("-- {name}\n{}", sql.trim()); } @@ -160,14 +173,14 @@ async fn main() -> anyhow::Result<()> { Ok(()) } -async fn run_merge_with_params(op_fn: MergeOp, params: &MergePerfParams) -> anyhow::Result<()> { +async fn run_merge_with_params( + op_fn: MergeOp, + params: &MergePerfParams, + parquet_dir: &Path, +) -> anyhow::Result<()> { let tmp_dir = tempfile::tempdir()?; - let parquet_dir = PathBuf::from( - std::env::var("TPCDS_PARQUET_DIR") - .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), - ); - let (source, table) = prepare_source_and_table(params, &tmp_dir, &parquet_dir).await?; + let (source, table) = prepare_source_and_table(params, &tmp_dir, parquet_dir).await?; let start = Instant::now(); let (_table, metrics) = op_fn(source, table)?.await?; @@ -182,13 +195,8 @@ async fn run_merge_with_params(op_fn: MergeOp, params: &MergePerfParams) -> anyh Ok(()) } -async fn run_merge_case(case: &MergeTestCase) -> anyhow::Result<()> { +async fn run_merge_case(case: &MergeTestCase, parquet_dir: &Path) -> anyhow::Result<()> { let tmp_dir = tempfile::tempdir()?; - let parquet_dir = PathBuf::from( - std::env::var("TPCDS_PARQUET_DIR") - .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), - ); - let (source, table) = prepare_source_and_table(&case.params, &tmp_dir, &parquet_dir).await?; let start = Instant::now(); @@ -206,16 +214,12 @@ async fn run_merge_case(case: &MergeTestCase) -> anyhow::Result<()> { Ok(()) } -async fn run_tpcds_query(query_name: &str) -> anyhow::Result<()> { +async fn run_tpcds_query(query_name: &str, parquet_dir: &Path) -> anyhow::Result<()> { let tmp_dir = tempfile::tempdir()?; - let parquet_dir = PathBuf::from( - std::env::var("TPCDS_PARQUET_DIR") - .unwrap_or_else(|_| "crates/benchmarks/data/tpcds_parquet".to_string()), - ); println!("Loading TPC-DS tables from {}...", parquet_dir.display()); let setup_start = Instant::now(); - let ctx = register_tpcds_tables(&tmp_dir, &parquet_dir).await?; + let ctx = register_tpcds_tables(&tmp_dir, parquet_dir).await?; let setup_elapsed = setup_start.elapsed(); println!("Setup completed in {} ms", setup_elapsed.as_millis()); From 82e2b8bd0cef501237a62f5daf25431ffb25d588 Mon Sep 17 00:00:00 2001 From: Abhi Agarwal Date: Sat, 18 Oct 2025 19:24:26 -0400 Subject: [PATCH 5/5] Enable clap `env` feature and update README Signed-off-by: Abhi Agarwal --- crates/benchmarks/Cargo.toml | 2 +- crates/benchmarks/README.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml index f8b99a7037..7b198c5088 100644 --- a/crates/benchmarks/Cargo.toml +++ b/crates/benchmarks/Cargo.toml @@ -10,7 +10,7 @@ edition = "2021" publish = false [dependencies] -clap = { version = "4", features = ["derive"] } +clap = { version = "4", features = ["derive", "env"] } tokio = { workspace = true, features = ["fs", "macros", "rt", "io-util"] } url = { workspace = true } tempfile = { workspace = true } diff --git a/crates/benchmarks/README.md b/crates/benchmarks/README.md index 506730ddab..0ce31bc469 100644 --- a/crates/benchmarks/README.md +++ b/crates/benchmarks/README.md @@ -1,4 +1,5 @@ -# Merge +# Benchmarks + The merge benchmarks are similar to the ones used by [Delta Spark](https://github.com/delta-io/delta/pull/1835).