From 79845ce15fa5d31e572126b15b1764cb00b02e9f Mon Sep 17 00:00:00 2001 From: Sergei Grebnov Date: Tue, 19 May 2026 18:18:04 +0300 Subject: [PATCH 1/5] =?UTF-8?q?feat(CH-BenCHmark):=20rename=20orders?= =?UTF-8?q?=E2=86=92oorder,=20add=20SF10/100/1000=20dispatch=20configurati?= =?UTF-8?q?ons=20(#10921)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * orders->oorder, add additional scale factors, update cdc-tuned * Improve test results output * Add more scale factors * Add dedicated dispatch for htap * Improve * Fix lint * More lint fixes * More lint fixes * Fix compaction_collapses_tiny_protected_snapshots * Apply suggestion from @lukekim --------- Co-authored-by: Luke Kim <80174+lukekim@users.noreply.github.com> --- .../workflows/testoperator_dispatch_htap.yml | 82 +++++++ .../tests/small_files_compaction_test.rs | 1 + crates/runtime/src/dataaccelerator/duckdb.rs | 1 - .../src/dataaccelerator/partitioned_duckdb.rs | 1 - crates/runtime/src/datafusion/builder.rs | 6 +- .../src/queries/chbench/q10.sql | 2 +- .../src/queries/chbench/q12.sql | 2 +- .../src/queries/chbench/q13.sql | 2 +- .../src/queries/chbench/q18.sql | 2 +- .../src/queries/chbench/q21.sql | 2 +- .../src/queries/chbench/q22.sql | 2 +- .../test-framework/src/queries/chbench/q3.sql | 2 +- .../test-framework/src/queries/chbench/q4.sql | 2 +- .../test-framework/src/queries/chbench/q5.sql | 2 +- .../test-framework/src/queries/chbench/q7.sql | 2 +- .../test-framework/src/queries/chbench/q8.sql | 2 +- .../test-framework/src/queries/chbench/q9.sql | 2 +- .../chbench/accelerated/postgres-arrow.yaml | 6 +- .../postgres-cayenne[file]-cdc-tuned.yaml | 226 ++++++++++++------ .../accelerated/postgres-cayenne[file].yaml | 6 +- .../accelerated/postgres-duckdb[file].yaml | 6 +- .../spicepods/chbench/federated/postgres.yaml | 4 +- tools/chbench-driver/src/loader.rs | 8 +- tools/chbench-driver/src/schema.rs | 12 +- tools/chbench-driver/src/txn/delivery.rs | 8 +- tools/chbench-driver/src/txn/new_order.rs | 6 +- tools/chbench-driver/src/txn/order_status.rs | 2 +- tools/chbench-driver/src/txn/prepared.rs | 10 +- .../dispatch/chbench/sf1/postgres-arrow.yaml | 2 +- .../sf1/postgres-cayenne[file]-cdc-tuned.yaml | 2 +- .../chbench/sf1/postgres-cayenne[file].yaml | 2 +- .../chbench/sf1/postgres-duckdb[file].yaml | 2 +- .../dispatch/chbench/sf10/postgres-arrow.yaml | 7 + .../postgres-cayenne[file]-cdc-tuned.yaml | 7 + .../chbench/sf10/postgres-cayenne[file].yaml | 7 + .../chbench/sf10/postgres-duckdb[file].yaml | 7 + .../postgres-cayenne[file]-cdc-tuned.yaml | 8 + .../chbench/sf100/postgres-cayenne[file].yaml | 8 + .../chbench/sf100/postgres-duckdb[file].yaml | 8 + .../postgres-cayenne[file]-cdc-tuned.yaml | 8 + .../chbench/sf1000/postgres-duckdb[file].yaml | 8 + tools/testoperator/src/commands/htap/mod.rs | 1 + .../src/commands/htap/staleness.rs | 9 +- 43 files changed, 359 insertions(+), 136 deletions(-) create mode 100644 .github/workflows/testoperator_dispatch_htap.yml create mode 100644 tools/testoperator/dispatch/chbench/sf10/postgres-arrow.yaml create mode 100644 tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file]-cdc-tuned.yaml create mode 100644 tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file].yaml create mode 100644 tools/testoperator/dispatch/chbench/sf10/postgres-duckdb[file].yaml create mode 100644 tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file]-cdc-tuned.yaml create mode 100644 tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file].yaml create mode 100644 tools/testoperator/dispatch/chbench/sf100/postgres-duckdb[file].yaml create mode 100644 tools/testoperator/dispatch/chbench/sf1000/postgres-cayenne[file]-cdc-tuned.yaml create mode 100644 tools/testoperator/dispatch/chbench/sf1000/postgres-duckdb[file].yaml diff --git a/.github/workflows/testoperator_dispatch_htap.yml b/.github/workflows/testoperator_dispatch_htap.yml new file mode 100644 index 0000000000..f35b59c85a --- /dev/null +++ b/.github/workflows/testoperator_dispatch_htap.yml @@ -0,0 +1,82 @@ +name: testoperator dispatch htap + +on: + workflow_dispatch: + inputs: + spiced_commit: + description: 'An optional commit hash to use for spiced' + required: false + type: string + scale_factor: + description: 'Scale factor to run' + required: true + type: choice + options: + - 'sf1' + - 'sf10' + - 'sf100' + - 'sf1000' + +jobs: + dispatch-htap: + name: Dispatch HTAP - ${{ github.event.inputs.scale_factor }} + runs-on: spiceai-dev-runners + concurrency: + group: testoperator-dispatch-htap-${{ github.event.ref }} + cancel-in-progress: false + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + + - name: Install MinIO + uses: ./.github/actions/setup-minio + with: + minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }} + minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }} + minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }} + + - name: Setup spiced + uses: ./.github/actions/setup-spiced + id: setup-spiced + with: + spiced_commit: ${{ github.event.inputs.spiced_commit }} + ref: ${{ github.event.ref }} + + - name: Display spiced commit + run: echo "SPICED_COMMIT=${{ steps.setup-spiced.outputs.SPICED_COMMIT }}" + + - name: Build Testoperator + uses: ./.github/actions/build-testoperator + with: + minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }} + minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }} + minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }} + + - name: Build spicepod validator + id: build-spicepod-validator + uses: ./.github/actions/build-spicepod-validator + with: + minio_endpoint: ${{ secrets.TEST_MINIO_ENDPOINT }} + minio_access_key: ${{ secrets.TEST_MINIO_ACCESS_KEY }} + minio_secret_key: ${{ secrets.TEST_MINIO_SECRET_KEY }} + + - name: Set spicepod validator path + run: echo "SPICEPOD_VALIDATOR=${{ steps.build-spicepod-validator.outputs.validator-path }}" >> $GITHUB_ENV + + - name: Validate spicepods - CH-BenCHmark + run: | + shopt -s globstar nullglob + for file in ./test/spicepods/chbench/**/*.yaml; do + echo "Validating $file" + "$SPICEPOD_VALIDATOR" "$file" + done + + - name: Dispatch Testoperator - HTAP - CH-BenCHmark - ${{ github.event.inputs.scale_factor }} + run: | + testoperator dispatch ./tools/testoperator/dispatch/chbench/${{ github.event.inputs.scale_factor }} \ + --workflow htap + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + SPICED_COMMIT: ${{ steps.setup-spiced.outputs.SPICED_COMMIT }} + WORKFLOW_COMMIT: ${{ github.event.ref }} diff --git a/crates/cayenne/tests/small_files_compaction_test.rs b/crates/cayenne/tests/small_files_compaction_test.rs index 67f23f676b..c072e1fe82 100644 --- a/crates/cayenne/tests/small_files_compaction_test.rs +++ b/crates/cayenne/tests/small_files_compaction_test.rs @@ -408,6 +408,7 @@ async fn compaction_collapses_tiny_protected_snapshots( let config = VortexConfig { target_vortex_file_size_mb: 128, compaction_trigger_files: 4, + compaction_trigger_protected_snapshots: 4, compaction_background_interval_ms: 0, ..Default::default() }; diff --git a/crates/runtime/src/dataaccelerator/duckdb.rs b/crates/runtime/src/dataaccelerator/duckdb.rs index 07beb48ba7..0db94086b8 100644 --- a/crates/runtime/src/dataaccelerator/duckdb.rs +++ b/crates/runtime/src/dataaccelerator/duckdb.rs @@ -96,7 +96,6 @@ pub(crate) fn create_factory() -> DuckDBTableProviderFactory { .with_function_support(deny_spice_functions_for_duckdb().as_ref().clone()) } -pub(crate) const DEFAULT_MIN_IDLE_CONNECTIONS: u32 = 10; pub(crate) const DEFAULT_CONNECTION_POOL_SIZE: u32 = 10; pub(crate) const DEFAULT_EBS_CONNECTION_POOL_SIZE: u32 = 4; pub(crate) const SPICE_ACCELERATOR_METADATA_KEY: &str = "spice.accelerator"; diff --git a/crates/runtime/src/dataaccelerator/partitioned_duckdb.rs b/crates/runtime/src/dataaccelerator/partitioned_duckdb.rs index f300ed1cf2..f34f2d852e 100644 --- a/crates/runtime/src/dataaccelerator/partitioned_duckdb.rs +++ b/crates/runtime/src/dataaccelerator/partitioned_duckdb.rs @@ -58,7 +58,6 @@ use super::{ use crate::{ component::dataset::acceleration::{Engine, Mode}, dataaccelerator::{FilePathError, storage::resolve_acceleration_storage_async}, - datafusion::{dialect::new_duckdb_dialect, udf::deny_spice_functions_for_duckdb}, parameters::ParameterSpec, register_data_accelerator, spice_data_base_path, }; diff --git a/crates/runtime/src/datafusion/builder.rs b/crates/runtime/src/datafusion/builder.rs index 619150eed5..966279e5ac 100644 --- a/crates/runtime/src/datafusion/builder.rs +++ b/crates/runtime/src/datafusion/builder.rs @@ -1132,12 +1132,12 @@ mod tests { .build(); let state = df.ctx.state(); - let rule_names: Vec<&str> = state.optimizers().iter().map(|r| r.name()).collect(); assert!( - !rule_names + !state + .optimizers() .iter() - .any(|name| *name == "cayenne_propagate_filter_across_equi_join_keys"), + .any(|r| r.name() == "cayenne_propagate_filter_across_equi_join_keys"), "Cayenne logical filter propagation should be disabled by default" ); } diff --git a/crates/test-framework/src/queries/chbench/q10.sql b/crates/test-framework/src/queries/chbench/q10.sql index c6237a1200..e319185caa 100644 --- a/crates/test-framework/src/queries/chbench/q10.sql +++ b/crates/test-framework/src/queries/chbench/q10.sql @@ -6,7 +6,7 @@ SELECT c_phone, n_name FROM - customer, orders, order_line, nation + customer, oorder, order_line, nation WHERE c_id = o_c_id AND c_w_id = o_w_id diff --git a/crates/test-framework/src/queries/chbench/q12.sql b/crates/test-framework/src/queries/chbench/q12.sql index 31cc5b9bcb..f6304970c7 100644 --- a/crates/test-framework/src/queries/chbench/q12.sql +++ b/crates/test-framework/src/queries/chbench/q12.sql @@ -3,7 +3,7 @@ SELECT sum(CASE WHEN o_carrier_id = 1 OR o_carrier_id = 2 THEN 1 ELSE 0 END) AS high_line_count, sum(CASE WHEN o_carrier_id <> 1 AND o_carrier_id <> 2 THEN 1 ELSE 0 END) AS low_line_count FROM - orders, order_line + oorder, order_line WHERE ol_w_id = o_w_id AND ol_d_id = o_d_id diff --git a/crates/test-framework/src/queries/chbench/q13.sql b/crates/test-framework/src/queries/chbench/q13.sql index 7934212802..f5039ab835 100644 --- a/crates/test-framework/src/queries/chbench/q13.sql +++ b/crates/test-framework/src/queries/chbench/q13.sql @@ -2,7 +2,7 @@ SELECT c_count, count(*) AS custdist FROM (SELECT c_id, count(o_id) AS c_count - FROM customer LEFT OUTER JOIN orders ON ( + FROM customer LEFT OUTER JOIN oorder ON ( c_w_id = o_w_id AND c_d_id = o_d_id AND c_id = o_c_id diff --git a/crates/test-framework/src/queries/chbench/q18.sql b/crates/test-framework/src/queries/chbench/q18.sql index ff3ffa7d62..e5654f3350 100644 --- a/crates/test-framework/src/queries/chbench/q18.sql +++ b/crates/test-framework/src/queries/chbench/q18.sql @@ -6,7 +6,7 @@ SELECT o_ol_cnt, sum(ol_amount) AS amount_sum FROM - customer, orders, order_line + customer, oorder, order_line WHERE c_id = o_c_id AND c_w_id = o_w_id diff --git a/crates/test-framework/src/queries/chbench/q21.sql b/crates/test-framework/src/queries/chbench/q21.sql index 3ed03f0dbc..ee79c1565a 100644 --- a/crates/test-framework/src/queries/chbench/q21.sql +++ b/crates/test-framework/src/queries/chbench/q21.sql @@ -3,7 +3,7 @@ SELECT FROM supplier, order_line l1, - orders, + oorder, stock, nation WHERE diff --git a/crates/test-framework/src/queries/chbench/q22.sql b/crates/test-framework/src/queries/chbench/q22.sql index db6dad2c79..9b7b871f47 100644 --- a/crates/test-framework/src/queries/chbench/q22.sql +++ b/crates/test-framework/src/queries/chbench/q22.sql @@ -9,7 +9,7 @@ WHERE AND c_balance > (SELECT avg(c_balance) FROM customer WHERE c_balance > 0.00 AND substr(c_phone,1,1) IN ('1','2','3','4','5','6','7')) - AND NOT EXISTS (SELECT * FROM orders + AND NOT EXISTS (SELECT * FROM oorder WHERE o_c_id = c_id AND o_w_id = c_w_id AND o_d_id = c_d_id) diff --git a/crates/test-framework/src/queries/chbench/q3.sql b/crates/test-framework/src/queries/chbench/q3.sql index fcdd2c5877..c7f57cc1bf 100644 --- a/crates/test-framework/src/queries/chbench/q3.sql +++ b/crates/test-framework/src/queries/chbench/q3.sql @@ -7,7 +7,7 @@ SELECT FROM customer, new_order, - orders, + oorder, order_line WHERE c_state LIKE 'A%' AND c_id = o_c_id diff --git a/crates/test-framework/src/queries/chbench/q4.sql b/crates/test-framework/src/queries/chbench/q4.sql index a0313f8703..1241336a61 100644 --- a/crates/test-framework/src/queries/chbench/q4.sql +++ b/crates/test-framework/src/queries/chbench/q4.sql @@ -2,7 +2,7 @@ SELECT o_ol_cnt, count(*) as order_count FROM - orders + oorder WHERE exists (SELECT * FROM order_line WHERE o_id = ol_o_id diff --git a/crates/test-framework/src/queries/chbench/q5.sql b/crates/test-framework/src/queries/chbench/q5.sql index 23e0bc55a5..3c002db9c7 100644 --- a/crates/test-framework/src/queries/chbench/q5.sql +++ b/crates/test-framework/src/queries/chbench/q5.sql @@ -3,7 +3,7 @@ SELECT sum(ol_amount) AS revenue FROM customer, - orders, + oorder, order_line, stock, supplier, diff --git a/crates/test-framework/src/queries/chbench/q7.sql b/crates/test-framework/src/queries/chbench/q7.sql index c658a5b5f6..cf6f652b42 100644 --- a/crates/test-framework/src/queries/chbench/q7.sql +++ b/crates/test-framework/src/queries/chbench/q7.sql @@ -4,7 +4,7 @@ SELECT extract(year FROM o_entry_d) AS l_year, sum(ol_amount) AS revenue FROM - supplier, stock, order_line, orders, customer, nation n1, nation n2 + supplier, stock, order_line, oorder, customer, nation n1, nation n2 WHERE ol_supply_w_id = s_w_id AND ol_i_id = s_i_id diff --git a/crates/test-framework/src/queries/chbench/q8.sql b/crates/test-framework/src/queries/chbench/q8.sql index 3924859d17..9b9d46a5ca 100644 --- a/crates/test-framework/src/queries/chbench/q8.sql +++ b/crates/test-framework/src/queries/chbench/q8.sql @@ -2,7 +2,7 @@ SELECT extract(year FROM o_entry_d) AS l_year, sum(CASE WHEN n2.n_name = 'INDIA' THEN ol_amount ELSE 0 END) / sum(ol_amount) AS mkt_share FROM - item, supplier, stock, order_line, orders, customer, nation n1, nation n2, region + item, supplier, stock, order_line, oorder, customer, nation n1, nation n2, region WHERE i_id = s_i_id AND ol_i_id = s_i_id diff --git a/crates/test-framework/src/queries/chbench/q9.sql b/crates/test-framework/src/queries/chbench/q9.sql index f88daa3acd..00bc553b42 100644 --- a/crates/test-framework/src/queries/chbench/q9.sql +++ b/crates/test-framework/src/queries/chbench/q9.sql @@ -2,7 +2,7 @@ SELECT n_name, extract(year FROM o_entry_d) AS l_year, sum(ol_amount) AS sum_profit FROM - item, stock, supplier, order_line, orders, nation + item, stock, supplier, order_line, oorder, nation WHERE ol_i_id = s_i_id AND ol_supply_w_id = s_w_id diff --git a/test/spicepods/chbench/accelerated/postgres-arrow.yaml b/test/spicepods/chbench/accelerated/postgres-arrow.yaml index c47aee6291..110095d0b1 100644 --- a/test/spicepods/chbench/accelerated/postgres-arrow.yaml +++ b/test/spicepods/chbench/accelerated/postgres-arrow.yaml @@ -57,11 +57,11 @@ datasets: on_conflict: (no_w_id, no_d_id, no_o_id): upsert - - from: postgres:orders - name: orders + - from: postgres:oorder + name: oorder params: <<: *postgres_params - pg_replication_slot: spice_orders + pg_replication_slot: spice_oorder acceleration: <<: *arrow_accel primary_key: (o_w_id, o_d_id, o_id) diff --git a/test/spicepods/chbench/accelerated/postgres-cayenne[file]-cdc-tuned.yaml b/test/spicepods/chbench/accelerated/postgres-cayenne[file]-cdc-tuned.yaml index aa2ac2889c..f3329d2569 100644 --- a/test/spicepods/chbench/accelerated/postgres-cayenne[file]-cdc-tuned.yaml +++ b/test/spicepods/chbench/accelerated/postgres-cayenne[file]-cdc-tuned.yaml @@ -2,175 +2,245 @@ version: v1 kind: Spicepod name: postgres-cayenne[file]-cdc-tuned - runtime: params: - cdc_prefetch_buffer: "1024" - cdc_max_coalesced_envelopes: "1024" - cdc_max_coalesced_bytes: "268435456" # 256 MB + cdc_prefetch_buffer: '1024' + cdc_max_coalesced_envelopes: '1024' + cdc_max_coalesced_bytes: '67108864' # 64 MB + cdc_max_coalesce_age_ms: '2000' + cdc_commit_timeout_ms: '60000' + cayenne_sort_merge_min_rows: '50000000' + query: + memory_limit: 40GiB + target_partitions: 32 datasets: - # ---- TPC-C core tables (mutated by OLTP workload) ---- + # ------------------------------------------------------------------ + # High-volume PK upsert tables. + # ------------------------------------------------------------------ - - from: postgres:warehouse - name: warehouse - params: &postgres_params + - from: postgres:district + name: district + params: &pg_base pg_host: 127.0.0.1 - pg_port: 5432 + pg_port: '5432' pg_db: chbench pg_user: bench pg_pass: bench pg_sslmode: disable - pg_replication_slot: spice_warehouse - pg_replication_initial_snapshot: "true" - acceleration: &cayenne_accel + pg_replication_slot: spice_district + pg_replication_initial_snapshot: 'true' + pg_replication_bootstrap_batch_size: '1048576' + acceleration: enabled: true engine: cayenne mode: file refresh_mode: changes - primary_key: w_id - on_conflict: - w_id: upsert - params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" - - - from: postgres:district - name: district - params: - <<: *postgres_params - pg_replication_slot: spice_district - acceleration: - <<: *cayenne_accel primary_key: (d_w_id, d_id) on_conflict: (d_w_id, d_id): upsert - params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + params: &accel_high_volume + cayenne_metastore: sqlite + cayenne_file_path: ./spice/cayenne/district + cayenne_write_concurrency: '16' + cayenne_upload_concurrency: '32' + cayenne_footer_cache_mb: '256' + cayenne_segment_cache_mb: '1024' + cayenne_compaction_trigger_protected_snapshots: '4' + cayenne_compaction_trigger_snapshot_age_ms: '15000' + cayenne_compaction_max_files_per_pick: '64' + cayenne_compaction_background_interval_ms: '3000' - from: postgres:customer name: customer params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_customer acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: (c_w_id, c_d_id, c_id) on_conflict: (c_w_id, c_d_id, c_id): upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_high_volume + cayenne_file_path: ./spice/cayenne/customer + cayenne_target_file_size_mb: '256' - from: postgres:new_order name: new_order params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_new_order acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: (no_w_id, no_d_id, no_o_id) on_conflict: (no_w_id, no_d_id, no_o_id): upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_high_volume + cayenne_file_path: ./spice/cayenne/new_order - - from: postgres:orders - name: orders + - from: postgres:oorder + name: oorder params: - <<: *postgres_params - pg_replication_slot: spice_orders + <<: *pg_base + pg_replication_slot: spice_oorder acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: (o_w_id, o_d_id, o_id) on_conflict: (o_w_id, o_d_id, o_id): upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_high_volume + cayenne_file_path: ./spice/cayenne/oorder - from: postgres:order_line name: order_line params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_order_line acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: (ol_w_id, ol_d_id, ol_o_id, ol_number) on_conflict: (ol_w_id, ol_d_id, ol_o_id, ol_number): upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_high_volume + cayenne_file_path: ./spice/cayenne/order_line + cayenne_write_concurrency: '32' + cayenne_target_file_size_mb: '256' + cayenne_compaction_trigger_files: '8' + cayenne_compaction_trigger_snapshot_age_ms: '30000' + cayenne_compaction_max_files_per_pick: '128' + cayenne_segment_cache_mb: '2048' + + - from: postgres:stock + name: stock + params: + <<: *pg_base + pg_replication_slot: spice_stock + acceleration: + enabled: true + engine: cayenne + mode: file + refresh_mode: changes + primary_key: (s_w_id, s_i_id) + on_conflict: + (s_w_id, s_i_id): upsert + params: + <<: *accel_high_volume + cayenne_file_path: ./spice/cayenne/stock + cayenne_target_file_size_mb: '256' + + # ------------------------------------------------------------------ + # Low-volume reference tables. + # ------------------------------------------------------------------ + + - from: postgres:warehouse + name: warehouse + params: + <<: *pg_base + pg_replication_slot: spice_warehouse + acceleration: + enabled: true + engine: cayenne + mode: file + refresh_mode: changes + primary_key: w_id + on_conflict: + w_id: upsert + params: &accel_medium + cayenne_metastore: sqlite + cayenne_file_path: ./spice/cayenne/warehouse + cayenne_write_concurrency: '4' + cayenne_upload_concurrency: '4' + cayenne_footer_cache_mb: '32' + cayenne_segment_cache_mb: '64' - from: postgres:item name: item params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_item acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: i_id on_conflict: i_id: upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_medium + cayenne_file_path: ./spice/cayenne/item - - from: postgres:stock - name: stock - params: - <<: *postgres_params - pg_replication_slot: spice_stock - acceleration: - <<: *cayenne_accel - primary_key: (s_w_id, s_i_id) - on_conflict: - (s_w_id, s_i_id): upsert - params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + # ------------------------------------------------------------------ + # Tiny TPC-H tables. + # ------------------------------------------------------------------ - from: postgres:region name: region params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_region acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: r_regionkey on_conflict: r_regionkey: upsert - params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + params: &accel_small + cayenne_metastore: sqlite + cayenne_file_path: ./spice/cayenne/region + cayenne_write_concurrency: '2' + cayenne_upload_concurrency: '2' + cayenne_footer_cache_mb: '8' + cayenne_segment_cache_mb: '16' - from: postgres:nation name: nation params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_nation acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: n_nationkey on_conflict: n_nationkey: upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_small + cayenne_file_path: ./spice/cayenne/nation - from: postgres:supplier name: supplier params: - <<: *postgres_params + <<: *pg_base pg_replication_slot: spice_supplier acceleration: - <<: *cayenne_accel + enabled: true + engine: cayenne + mode: file + refresh_mode: changes primary_key: su_suppkey on_conflict: su_suppkey: upsert params: - cayenne_write_concurrency: "4" - cayenne_upload_concurrency: "4" + <<: *accel_small + cayenne_file_path: ./spice/cayenne/supplier diff --git a/test/spicepods/chbench/accelerated/postgres-cayenne[file].yaml b/test/spicepods/chbench/accelerated/postgres-cayenne[file].yaml index b0a7b310d3..83330ac75a 100644 --- a/test/spicepods/chbench/accelerated/postgres-cayenne[file].yaml +++ b/test/spicepods/chbench/accelerated/postgres-cayenne[file].yaml @@ -58,11 +58,11 @@ datasets: on_conflict: (no_w_id, no_d_id, no_o_id): upsert - - from: postgres:orders - name: orders + - from: postgres:oorder + name: oorder params: <<: *postgres_params - pg_replication_slot: spice_orders + pg_replication_slot: spice_oorder acceleration: <<: *cayenne_accel primary_key: (o_w_id, o_d_id, o_id) diff --git a/test/spicepods/chbench/accelerated/postgres-duckdb[file].yaml b/test/spicepods/chbench/accelerated/postgres-duckdb[file].yaml index 9736247dd8..f9e5913a44 100644 --- a/test/spicepods/chbench/accelerated/postgres-duckdb[file].yaml +++ b/test/spicepods/chbench/accelerated/postgres-duckdb[file].yaml @@ -58,11 +58,11 @@ datasets: on_conflict: (no_w_id, no_d_id, no_o_id): upsert - - from: postgres:orders - name: orders + - from: postgres:oorder + name: oorder params: <<: *postgres_params - pg_replication_slot: spice_orders + pg_replication_slot: spice_oorder acceleration: <<: *duckdb_accel primary_key: (o_w_id, o_d_id, o_id) diff --git a/test/spicepods/chbench/federated/postgres.yaml b/test/spicepods/chbench/federated/postgres.yaml index f08ce82504..81b309d573 100644 --- a/test/spicepods/chbench/federated/postgres.yaml +++ b/test/spicepods/chbench/federated/postgres.yaml @@ -22,8 +22,8 @@ datasets: - from: postgres:history name: history params: *postgres_params - - from: postgres:orders - name: orders + - from: postgres:oorder + name: oorder params: *postgres_params - from: postgres:new_order name: new_order diff --git a/tools/chbench-driver/src/loader.rs b/tools/chbench-driver/src/loader.rs index 635efd5a6b..e5ebc58095 100644 --- a/tools/chbench-driver/src/loader.rs +++ b/tools/chbench-driver/src/loader.rs @@ -161,7 +161,7 @@ const WAREHOUSE_TABLES: &[(&str, &str, &str)] = &[ "h_c_id, h_c_d_id, h_d_id, h_w_id, h_date, h_amount, h_data", ), ( - "orders", + "oorder", "o_w_id", "o_id, o_d_id, o_c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local", ), @@ -565,7 +565,7 @@ async fn load_orders( d_id: i32, ) -> Result> { let mut sink = BatchSink::new( - "INSERT INTO orders (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local) VALUES", + "INSERT INTO oorder (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local) VALUES", ); let mut row = String::new(); @@ -597,9 +597,9 @@ async fn load_orders( sql_opt_i32(o_carrier_id), ); sink.write_row(&row); - sink.maybe_flush(client, "orders").await?; + sink.maybe_flush(client, "oorder").await?; } - sink.flush(client, "orders").await?; + sink.flush(client, "oorder").await?; Ok(ol_cnts) } diff --git a/tools/chbench-driver/src/schema.rs b/tools/chbench-driver/src/schema.rs index 6bdc5fa59f..43835593a2 100644 --- a/tools/chbench-driver/src/schema.rs +++ b/tools/chbench-driver/src/schema.rs @@ -28,7 +28,7 @@ pub const ALL_TABLES: &[&str] = &[ "stock", "customer", "history", - "orders", + "oorder", "new_order", "order_line", "nation", @@ -200,8 +200,8 @@ pub async fn create_tables(client: &Client) -> Result<()> { )", ), ( - "orders", - "CREATE TABLE IF NOT EXISTS orders ( + "oorder", + "CREATE TABLE IF NOT EXISTS oorder ( o_id INT NOT NULL, o_d_id INT NOT NULL, o_w_id INT NOT NULL, @@ -325,7 +325,7 @@ pub async fn create_tables(client: &Client) -> Result<()> { ), ( "idx_order", - "CREATE INDEX IF NOT EXISTS idx_order ON orders (o_w_id, o_d_id, o_c_id, o_id)", + "CREATE INDEX IF NOT EXISTS idx_order ON oorder (o_w_id, o_d_id, o_c_id, o_id)", ), ]; @@ -354,7 +354,7 @@ const MUTATED_TABLES: &[&str] = &[ "customer", "history", "new_order", - "orders", + "oorder", "order_line", "stock", ]; @@ -368,7 +368,7 @@ pub const STALENESS_PROBE_TABLES: &[&str] = &[ "district", "new_order", "order_line", - "orders", + "oorder", "stock", "warehouse", ]; diff --git a/tools/chbench-driver/src/txn/delivery.rs b/tools/chbench-driver/src/txn/delivery.rs index c5154dcf40..c0d1d553ae 100644 --- a/tools/chbench-driver/src/txn/delivery.rs +++ b/tools/chbench-driver/src/txn/delivery.rs @@ -73,24 +73,24 @@ pub async fn run(client: &mut Client, rng: &mut impl Rng, warehouses: i32) -> Re // 3. UPDATE orders with carrier tx.execute( - "UPDATE orders SET o_carrier_id = $1 WHERE o_w_id = $2 AND o_d_id = $3 AND o_id = $4", + "UPDATE oorder SET o_carrier_id = $1 WHERE o_w_id = $2 AND o_d_id = $3 AND o_id = $4", &[&o_carrier_id, &w_id, &d_id, &no_o_id], ) .await .map_err(|source| crate::Error::Sql { - action: "delivery: update orders".into(), + action: "delivery: update oorder".into(), source, })?; // 4. Get customer ID for this order let o_row = tx .query_one( - "SELECT o_c_id FROM orders WHERE o_w_id = $1 AND o_d_id = $2 AND o_id = $3", + "SELECT o_c_id FROM oorder WHERE o_w_id = $1 AND o_d_id = $2 AND o_id = $3", &[&w_id, &d_id, &no_o_id], ) .await .map_err(|source| crate::Error::Sql { - action: "delivery: select orders c_id".into(), + action: "delivery: select oorder c_id".into(), source, })?; diff --git a/tools/chbench-driver/src/txn/new_order.rs b/tools/chbench-driver/src/txn/new_order.rs index 78588ecc5b..11028dbdc3 100644 --- a/tools/chbench-driver/src/txn/new_order.rs +++ b/tools/chbench-driver/src/txn/new_order.rs @@ -116,14 +116,14 @@ pub async fn run( let o_id = d_next_o_id; let now = SystemTime::now(); - // 4. INSERT orders + // 4. INSERT oorder tx.execute( - &stmts.insert_orders, + &stmts.insert_oorder, &[&o_id, &d_id, &w_id, &c_id, &now, &ol_cnt, &all_local], ) .await .map_err(|source| crate::Error::Sql { - action: "new_order: insert orders".into(), + action: "new_order: insert oorder".into(), source, })?; diff --git a/tools/chbench-driver/src/txn/order_status.rs b/tools/chbench-driver/src/txn/order_status.rs index 8180173dae..6b5a555183 100644 --- a/tools/chbench-driver/src/txn/order_status.rs +++ b/tools/chbench-driver/src/txn/order_status.rs @@ -106,7 +106,7 @@ pub async fn run(client: &mut Client, rng: &mut impl Rng, warehouses: i32) -> Re // SELECT latest order let o_row = tx .query_opt( - "SELECT o_id, o_carrier_id, o_entry_d FROM orders WHERE o_w_id = $1 AND o_d_id = $2 AND o_c_id = $3 ORDER BY o_id DESC LIMIT 1", + "SELECT o_id, o_carrier_id, o_entry_d FROM oorder WHERE o_w_id = $1 AND o_d_id = $2 AND o_c_id = $3 ORDER BY o_id DESC LIMIT 1", &[&w_id, &d_id, &c_id], ) .await diff --git a/tools/chbench-driver/src/txn/prepared.rs b/tools/chbench-driver/src/txn/prepared.rs index 20337548d9..a302e04474 100644 --- a/tools/chbench-driver/src/txn/prepared.rs +++ b/tools/chbench-driver/src/txn/prepared.rs @@ -28,7 +28,7 @@ pub struct NewOrderStmts { pub select_customer_warehouse: Statement, pub select_district: Statement, pub update_district: Statement, - pub insert_orders: Statement, + pub insert_oorder: Statement, pub insert_new_order: Statement, pub select_item: Statement, /// One prepared statement per district (`s_dist_01` through `s_dist_10`). @@ -107,14 +107,14 @@ impl PreparedStatements { source, })?; - let insert_orders = client + let insert_oorder = client .prepare( - "INSERT INTO orders (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_ol_cnt, o_all_local) \ + "INSERT INTO oorder (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_ol_cnt, o_all_local) \ VALUES ($1, $2, $3, $4, $5, $6, $7)", ) .await .map_err(|source| crate::Error::Sql { - action: "prepare new_order: insert_orders".into(), + action: "prepare new_order: insert_oorder".into(), source, })?; @@ -182,7 +182,7 @@ impl PreparedStatements { select_customer_warehouse, select_district, update_district, - insert_orders, + insert_oorder, insert_new_order, select_item, select_stock, diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml index dd534a5622..d18e3b07ca 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml @@ -3,5 +3,5 @@ tests: spicepod_path: accelerated/postgres-arrow.yaml runner_type: spiceai-dev-runners scale_factor: 1 - duration: 300 + duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml index c95d2bbc74..9b0be024ff 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml @@ -3,5 +3,5 @@ tests: spicepod_path: accelerated/postgres-cayenne[file]-cdc-tuned.yaml runner_type: spiceai-dev-runners scale_factor: 1 - duration: 300 + duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml index 4b9bd14a29..fba9f6ec28 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml @@ -3,5 +3,5 @@ tests: spicepod_path: accelerated/postgres-cayenne[file].yaml runner_type: spiceai-dev-runners scale_factor: 1 - duration: 300 + duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml index a3d3857b70..de3d3855b5 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml @@ -3,5 +3,5 @@ tests: spicepod_path: accelerated/postgres-duckdb[file].yaml runner_type: spiceai-dev-runners scale_factor: 1 - duration: 300 + duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf10/postgres-arrow.yaml b/tools/testoperator/dispatch/chbench/sf10/postgres-arrow.yaml new file mode 100644 index 0000000000..345407e5c5 --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf10/postgres-arrow.yaml @@ -0,0 +1,7 @@ +tests: + htap: + spicepod_path: accelerated/postgres-arrow.yaml + runner_type: spiceai-dev-large-runners + scale_factor: 10 + duration: 600 + ready_wait: 300 diff --git a/tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file]-cdc-tuned.yaml b/tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file]-cdc-tuned.yaml new file mode 100644 index 0000000000..5b150a0948 --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file]-cdc-tuned.yaml @@ -0,0 +1,7 @@ +tests: + htap: + spicepod_path: accelerated/postgres-cayenne[file]-cdc-tuned.yaml + runner_type: spiceai-dev-large-runners + scale_factor: 10 + duration: 600 + ready_wait: 300 diff --git a/tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file].yaml b/tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file].yaml new file mode 100644 index 0000000000..4372c8fe94 --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf10/postgres-cayenne[file].yaml @@ -0,0 +1,7 @@ +tests: + htap: + spicepod_path: accelerated/postgres-cayenne[file].yaml + runner_type: spiceai-dev-large-runners + scale_factor: 10 + duration: 600 + ready_wait: 300 diff --git a/tools/testoperator/dispatch/chbench/sf10/postgres-duckdb[file].yaml b/tools/testoperator/dispatch/chbench/sf10/postgres-duckdb[file].yaml new file mode 100644 index 0000000000..231d17ae3d --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf10/postgres-duckdb[file].yaml @@ -0,0 +1,7 @@ +tests: + htap: + spicepod_path: accelerated/postgres-duckdb[file].yaml + runner_type: spiceai-dev-large-runners + scale_factor: 10 + duration: 600 + ready_wait: 300 diff --git a/tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file]-cdc-tuned.yaml b/tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file]-cdc-tuned.yaml new file mode 100644 index 0000000000..f3f7b4f9a6 --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file]-cdc-tuned.yaml @@ -0,0 +1,8 @@ +tests: + htap: + spicepod_path: accelerated/postgres-cayenne[file]-cdc-tuned.yaml + runner_type: spiceai-dev-large-runners + scale_factor: 100 + terminals: 100 + duration: 600 + ready_wait: 600 diff --git a/tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file].yaml b/tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file].yaml new file mode 100644 index 0000000000..d974974a19 --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf100/postgres-cayenne[file].yaml @@ -0,0 +1,8 @@ +tests: + htap: + spicepod_path: accelerated/postgres-cayenne[file].yaml + runner_type: spiceai-dev-large-runners + scale_factor: 100 + terminals: 100 + duration: 600 + ready_wait: 600 diff --git a/tools/testoperator/dispatch/chbench/sf100/postgres-duckdb[file].yaml b/tools/testoperator/dispatch/chbench/sf100/postgres-duckdb[file].yaml new file mode 100644 index 0000000000..eb390d3cfa --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf100/postgres-duckdb[file].yaml @@ -0,0 +1,8 @@ +tests: + htap: + spicepod_path: accelerated/postgres-duckdb[file].yaml + runner_type: spiceai-dev-large-runners + scale_factor: 100 + terminals: 100 + duration: 600 + ready_wait: 600 diff --git a/tools/testoperator/dispatch/chbench/sf1000/postgres-cayenne[file]-cdc-tuned.yaml b/tools/testoperator/dispatch/chbench/sf1000/postgres-cayenne[file]-cdc-tuned.yaml new file mode 100644 index 0000000000..73162a303a --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf1000/postgres-cayenne[file]-cdc-tuned.yaml @@ -0,0 +1,8 @@ +tests: + htap: + spicepod_path: accelerated/postgres-cayenne[file]-cdc-tuned.yaml + runner_type: spiceai-dev-large-runners + scale_factor: 1000 + terminals: 100 + duration: 600 + ready_wait: 600 diff --git a/tools/testoperator/dispatch/chbench/sf1000/postgres-duckdb[file].yaml b/tools/testoperator/dispatch/chbench/sf1000/postgres-duckdb[file].yaml new file mode 100644 index 0000000000..950f81ddb2 --- /dev/null +++ b/tools/testoperator/dispatch/chbench/sf1000/postgres-duckdb[file].yaml @@ -0,0 +1,8 @@ +tests: + htap: + spicepod_path: accelerated/postgres-duckdb[file].yaml + runner_type: spiceai-dev-large-runners + scale_factor: 1000 + terminals: 100 + duration: 600 + ready_wait: 600 diff --git a/tools/testoperator/src/commands/htap/mod.rs b/tools/testoperator/src/commands/htap/mod.rs index 3ba10b73b5..72276a5e45 100644 --- a/tools/testoperator/src/commands/htap/mod.rs +++ b/tools/testoperator/src/commands/htap/mod.rs @@ -356,6 +356,7 @@ fn emit_replication_metrics(metrics: &crate::spiced_metrics::SpicedMetrics) { worst_lag_ms = l_ms; } } + println!(); // Headline: worst replication lag across all datasets. crate::metrics::REPLICATION_LAG_MS.record(worst_lag_ms, &[]); diff --git a/tools/testoperator/src/commands/htap/staleness.rs b/tools/testoperator/src/commands/htap/staleness.rs index f0002282aa..f5c4fa16bd 100644 --- a/tools/testoperator/src/commands/htap/staleness.rs +++ b/tools/testoperator/src/commands/htap/staleness.rs @@ -58,11 +58,15 @@ impl StalenessReport { /// Print a human-readable data freshness summary and record OTEL metrics. pub fn emit(&self) { println!("\nData Freshness"); + println!( + " {:<14} {:>10} {:>10} {:>10} {:>10}", + "dataset", "p50_ms", "p99_ms", "max_ms", "samples" + ); for table in &self.probe_tables { if let Some(stats) = self.tables.get(table.as_str()) { println!( - " {:<14} P50={:>5}ms P99={:>5}ms max={:>5}ms ({} samples)", - format!("{table}:"), + " {:<14} {:>10} {:>10} {:>10} {:>10}", + table, stats.p50.as_millis(), stats.p99.as_millis(), stats.max.as_millis(), @@ -74,7 +78,6 @@ impl StalenessReport { .record(p99_ms, &[KeyValue::new("dataset", table.clone())]); } } - println!(" ─────────────────"); println!(" worst P99: {}ms", self.worst_p99.as_millis()); #[expect(clippy::cast_precision_loss)] let worst_ms = self.worst_p99.as_millis() as f64; From b254021831dd0413ebfe24c65f727e306ea3c90e Mon Sep 17 00:00:00 2001 From: Sergei Grebnov Date: Tue, 19 May 2026 19:20:44 +0300 Subject: [PATCH 2/5] feat(CH-benCHmark): Use dev-large runners for SF1, increase staleness probe interval to 5s (#10930) --- tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml | 2 +- .../chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml | 2 +- .../dispatch/chbench/sf1/postgres-cayenne[file].yaml | 2 +- .../dispatch/chbench/sf1/postgres-duckdb[file].yaml | 2 +- tools/testoperator/src/commands/htap/staleness.rs | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml index d18e3b07ca..2412040a76 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-arrow.yaml @@ -1,7 +1,7 @@ tests: htap: spicepod_path: accelerated/postgres-arrow.yaml - runner_type: spiceai-dev-runners + runner_type: spiceai-dev-large-runners scale_factor: 1 duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml index 9b0be024ff..40ba43a5ab 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file]-cdc-tuned.yaml @@ -1,7 +1,7 @@ tests: htap: spicepod_path: accelerated/postgres-cayenne[file]-cdc-tuned.yaml - runner_type: spiceai-dev-runners + runner_type: spiceai-dev-large-runners scale_factor: 1 duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml index fba9f6ec28..ba2f959399 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-cayenne[file].yaml @@ -1,7 +1,7 @@ tests: htap: spicepod_path: accelerated/postgres-cayenne[file].yaml - runner_type: spiceai-dev-runners + runner_type: spiceai-dev-large-runners scale_factor: 1 duration: 600 ready_wait: 60 diff --git a/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml b/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml index de3d3855b5..3c32a29664 100644 --- a/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml +++ b/tools/testoperator/dispatch/chbench/sf1/postgres-duckdb[file].yaml @@ -1,7 +1,7 @@ tests: htap: spicepod_path: accelerated/postgres-duckdb[file].yaml - runner_type: spiceai-dev-runners + runner_type: spiceai-dev-large-runners scale_factor: 1 duration: 600 ready_wait: 60 diff --git a/tools/testoperator/src/commands/htap/staleness.rs b/tools/testoperator/src/commands/htap/staleness.rs index f5c4fa16bd..04d7f851b0 100644 --- a/tools/testoperator/src/commands/htap/staleness.rs +++ b/tools/testoperator/src/commands/htap/staleness.rs @@ -16,7 +16,7 @@ limitations under the License. //! Staleness gap measurement for HTAP benchmarks. //! -//! Probes TPC-C tables every 1s by comparing `MAX(_bench_ts)` between the +//! Probes TPC-C tables every 5s by comparing `MAX(_bench_ts)` between the //! source and the Spice accelerated copy. The gap is the replication //! staleness — how far behind Spice is from the source at any given moment. //! @@ -102,7 +102,7 @@ async fn run_staleness_probe( spice_client: spiceai::Client, cancel: CancellationToken, ) -> anyhow::Result { - let poll_interval = Duration::from_secs(1); + let poll_interval = Duration::from_secs(5); let probe_tables = driver.probe_tables(); // Per-table gap samples (microseconds). From 5a60299f7995dfecce52a1823773655ececc062a Mon Sep 17 00:00:00 2001 From: Sergei Grebnov Date: Tue, 19 May 2026 23:01:04 +0300 Subject: [PATCH 3/5] Benchmarks: add publish.workspace = true to chbench-driver Cargo.toml (#10935) --- tools/chbench-driver/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/chbench-driver/Cargo.toml b/tools/chbench-driver/Cargo.toml index 370fd4c166..238ae9a583 100644 --- a/tools/chbench-driver/Cargo.toml +++ b/tools/chbench-driver/Cargo.toml @@ -4,6 +4,7 @@ version.workspace = true edition.workspace = true rust-version.workspace = true license-file.workspace = true +publish.workspace = true [dependencies] async-trait = { workspace = true } From 0f67b35abe8289d9d368e024b6e2094cb36520a9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 13:32:55 -0700 Subject: [PATCH 4/5] fix: Update tpch benchmark snapshots for accelerated/s3[parquet]-cayenne[file].yaml (#10928) Co-authored-by: Spice Benchmark Snapshot Update Bot Co-authored-by: Sergei Grebnov --- ...-cayenne[file]_tpch_q11_explain_sf100.snap | 88 +++---- ...-cayenne[file]_tpch_q17_explain_sf100.snap | 78 ++---- ...-cayenne[file]_tpch_q19_explain_sf100.snap | 31 +-- ...-cayenne[file]_tpch_q20_explain_sf100.snap | 179 +++++-------- ...-cayenne[file]_tpch_q21_explain_sf100.snap | 174 ++++++------- ...]-cayenne[file]_tpch_q2_explain_sf100.snap | 236 ++++++++---------- ...]-cayenne[file]_tpch_q3_explain_sf100.snap | 27 +- ...]-cayenne[file]_tpch_q5_explain_sf100.snap | 158 ++++++------ ...]-cayenne[file]_tpch_q7_explain_sf100.snap | 176 ++++++------- ...]-cayenne[file]_tpch_q8_explain_sf100.snap | 223 ++++++++--------- ...]-cayenne[file]_tpch_q9_explain_sf100.snap | 163 ++++++------ ...[parquet]-cayenne[file]_tpch_q1_sf100.snap | 16 +- 12 files changed, 647 insertions(+), 902 deletions(-) diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q11_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q11_explain_sf100.snap index 9153c25803..4b2c246a87 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q11_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q11_explain_sf100.snap @@ -11,16 +11,10 @@ description: "Query: tpch_q11" | | Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] | | | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost | | | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | LeftSemi Join: supplier.s_nationkey = __correlated_sq_4.n_nationkey | -| | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | -| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], partial_filters=[Boolean(true)] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | SubqueryAlias: __correlated_sq_4 | -| | SubqueryAlias: __cayenne_xclos___2 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY")] | +| | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | +| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], partial_filters=[Boolean(true)] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | | | Projection: nation.n_nationkey | | | Filter: nation.n_name = LargeUtf8("GERMANY") | | | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY")] | @@ -29,16 +23,10 @@ description: "Query: tpch_q11" | | Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] | | | Projection: partsupp.ps_availqty, partsupp.ps_supplycost | | | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | LeftSemi Join: supplier.s_nationkey = __correlated_sq_5.n_nationkey | -| | Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | -| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | -| | TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | SubqueryAlias: __correlated_sq_5 | -| | SubqueryAlias: __cayenne_xclos___3 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY")] | +| | Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | +| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | +| | TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | | | Projection: nation.n_nationkey | | | Filter: nation.n_name = LargeUtf8("GERMANY") | | | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY")] | @@ -56,24 +44,18 @@ description: "Query: tpch_q11" | | CayenneAccelerationExec | | | BytesProcessedExec | | | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY | -| | ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([ps_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([ps_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] | | | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as sum(partsupp.ps_supplycost * partsupp.ps_availqty), CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) as join_proj_push_down_1] | | | AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] | | | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | @@ -84,23 +66,17 @@ description: "Query: tpch_q11" | | CayenneAccelerationExec | | | BytesProcessedExec | | | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@3)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY | -| | ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([ps_suppkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([ps_suppkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] | | | | +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q17_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q17_explain_sf100.snap index 272f5629c5..73741c88bc 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q17_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q17_explain_sf100.snap @@ -11,35 +11,14 @@ description: "Query: tpch_q17" | | Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.Float64(0.2) * avg(lineitem.l_quantity) | | | Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey | | | Inner Join: lineitem.l_partkey = part.p_partkey | -| | LeftSemi Join: lineitem.l_partkey = __correlated_sq_6.p_partkey | -| | TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] | -| | SubqueryAlias: __correlated_sq_6 | -| | SubqueryAlias: __cayenne_xclos___4 | -| | Projection: part.p_partkey | -| | Filter: part.p_brand = LargeUtf8("Brand#23") AND part.p_container = LargeUtf8("MED BOX") | -| | TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = LargeUtf8("Brand#23"), part.p_container = LargeUtf8("MED BOX")] | +| | TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] | | | Projection: part.p_partkey | | | Filter: part.p_brand = LargeUtf8("Brand#23") AND part.p_container = LargeUtf8("MED BOX") | | | TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = LargeUtf8("Brand#23"), part.p_container = LargeUtf8("MED BOX")] | -| | LeftSemi Join: __scalar_sq_1.l_partkey = __correlated_sq_3.p_partkey | -| | SubqueryAlias: __scalar_sq_1 | -| | Projection: CAST(Float64(0.2) * avg(lineitem.l_quantity) AS Decimal128(30, 15)), lineitem.l_partkey | -| | Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[avg(CAST(lineitem.l_quantity AS Float64))]] | -| | TableScan: lineitem projection=[l_partkey, l_quantity] | -| | SubqueryAlias: __correlated_sq_3 | -| | SubqueryAlias: __cayenne_xclos___2 | -| | Projection: part.p_partkey | -| | Inner Join: lineitem.l_partkey = part.p_partkey | -| | LeftSemi Join: lineitem.l_partkey = __correlated_sq_7.p_partkey | -| | TableScan: lineitem projection=[l_partkey] | -| | SubqueryAlias: __correlated_sq_7 | -| | SubqueryAlias: __cayenne_xclos___5 | -| | Projection: part.p_partkey | -| | Filter: part.p_brand = LargeUtf8("Brand#23") AND part.p_container = LargeUtf8("MED BOX") | -| | TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = LargeUtf8("Brand#23"), part.p_container = LargeUtf8("MED BOX")] | -| | Projection: part.p_partkey | -| | Filter: part.p_brand = LargeUtf8("Brand#23") AND part.p_container = LargeUtf8("MED BOX") | -| | TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = LargeUtf8("Brand#23"), part.p_container = LargeUtf8("MED BOX")] | +| | SubqueryAlias: __scalar_sq_1 | +| | Projection: CAST(Float64(0.2) * avg(lineitem.l_quantity) AS Decimal128(30, 15)), lineitem.l_partkey | +| | Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[avg(CAST(lineitem.l_quantity AS Float64))]] | +| | TableScan: lineitem projection=[l_partkey, l_quantity] | | physical_plan | ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] | | | AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] | | | CoalescePartitionsExec | @@ -53,45 +32,18 @@ description: "Query: tpch_q17" | | CayenneAccelerationExec | | | BytesProcessedExec | | | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_brand, p_container], file_type=vortex, predicate: p_brand@3 = Brand#23 AND p_container@6 = MED BOX | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@0)] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_brand, p_container], file_type=vortex, predicate: p_brand@3 = Brand#23 AND p_container@6 = MED BOX | -| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | +| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_quantity, l_extendedprice], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[CAST(0.2 * avg(lineitem.l_quantity)@1 AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] | +| | AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] | +| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] | | | SchemaCastScanExec | | | CayenneAccelerationExec | | | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_quantity, l_extendedprice], file_type=vortex | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_brand, p_container], file_type=vortex, predicate: p_brand@3 = Brand#23 AND p_container@6 = MED BOX | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@0)] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_brand, p_container], file_type=vortex, predicate: p_brand@3 = Brand#23 AND p_container@6 = MED BOX | -| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey], file_type=vortex | -| | ProjectionExec: expr=[CAST(0.2 * avg(lineitem.l_quantity)@1 AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] | -| | AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] | -| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_quantity], file_type=vortex | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_quantity], file_type=vortex, predicate: DynamicFilter [ empty ] | | | | +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q19_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q19_explain_sf100.snap index db6b787e36..446c68699c 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q19_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q19_explain_sf100.snap @@ -9,15 +9,9 @@ description: "Query: tpch_q19" | | Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | | | Projection: lineitem.l_extendedprice, lineitem.l_discount | | | Inner Join: lineitem.l_partkey = part.p_partkey Filter: part.p_brand = LargeUtf8("Brand#12") AND part.p_container IN ([LargeUtf8("SM CASE"), LargeUtf8("SM BOX"), LargeUtf8("SM PACK"), LargeUtf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand = LargeUtf8("Brand#23") AND part.p_container IN ([LargeUtf8("MED BAG"), LargeUtf8("MED BOX"), LargeUtf8("MED PKG"), LargeUtf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) AND part.p_size <= Int32(10) OR part.p_brand = LargeUtf8("Brand#34") AND part.p_container IN ([LargeUtf8("LG CASE"), LargeUtf8("LG BOX"), LargeUtf8("LG PACK"), LargeUtf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2) AND part.p_size <= Int32(15) | -| | LeftSemi Join: lineitem.l_partkey = __correlated_sq_2.p_partkey | -| | Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount | -| | Filter: (lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)) AND (lineitem.l_shipmode = LargeUtf8("AIR") OR lineitem.l_shipmode = LargeUtf8("AIR REG")) AND lineitem.l_shipinstruct = LargeUtf8("DELIVER IN PERSON") | -| | TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], partial_filters=[lineitem.l_shipmode = LargeUtf8("AIR") OR lineitem.l_shipmode = LargeUtf8("AIR REG"), lineitem.l_shipinstruct = LargeUtf8("DELIVER IN PERSON"), lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)] | -| | SubqueryAlias: __correlated_sq_2 | -| | SubqueryAlias: __cayenne_xclos___1 | -| | Projection: part.p_partkey | -| | Filter: part.p_size >= Int32(1) AND (part.p_brand = LargeUtf8("Brand#12") AND part.p_container IN ([LargeUtf8("SM CASE"), LargeUtf8("SM BOX"), LargeUtf8("SM PACK"), LargeUtf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = LargeUtf8("Brand#23") AND part.p_container IN ([LargeUtf8("MED BAG"), LargeUtf8("MED BOX"), LargeUtf8("MED PKG"), LargeUtf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = LargeUtf8("Brand#34") AND part.p_container IN ([LargeUtf8("LG CASE"), LargeUtf8("LG BOX"), LargeUtf8("LG PACK"), LargeUtf8("LG PKG")]) AND part.p_size <= Int32(15)) | -| | TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = LargeUtf8("Brand#12") AND part.p_container IN ([LargeUtf8("SM CASE"), LargeUtf8("SM BOX"), LargeUtf8("SM PACK"), LargeUtf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = LargeUtf8("Brand#23") AND part.p_container IN ([LargeUtf8("MED BAG"), LargeUtf8("MED BOX"), LargeUtf8("MED PKG"), LargeUtf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = LargeUtf8("Brand#34") AND part.p_container IN ([LargeUtf8("LG CASE"), LargeUtf8("LG BOX"), LargeUtf8("LG PACK"), LargeUtf8("LG PKG")]) AND part.p_size <= Int32(15)] | +| | Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount | +| | Filter: (lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)) AND (lineitem.l_shipmode = LargeUtf8("AIR") OR lineitem.l_shipmode = LargeUtf8("AIR REG")) AND lineitem.l_shipinstruct = LargeUtf8("DELIVER IN PERSON") | +| | TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], partial_filters=[lineitem.l_shipmode = LargeUtf8("AIR") OR lineitem.l_shipmode = LargeUtf8("AIR REG"), lineitem.l_shipinstruct = LargeUtf8("DELIVER IN PERSON"), lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)] | | | Filter: (part.p_brand = LargeUtf8("Brand#12") AND part.p_container IN ([LargeUtf8("SM CASE"), LargeUtf8("SM BOX"), LargeUtf8("SM PACK"), LargeUtf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = LargeUtf8("Brand#23") AND part.p_container IN ([LargeUtf8("MED BAG"), LargeUtf8("MED BOX"), LargeUtf8("MED PKG"), LargeUtf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = LargeUtf8("Brand#34") AND part.p_container IN ([LargeUtf8("LG CASE"), LargeUtf8("LG BOX"), LargeUtf8("LG PACK"), LargeUtf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) | | | TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = LargeUtf8("Brand#12") AND part.p_container IN ([LargeUtf8("SM CASE"), LargeUtf8("SM BOX"), LargeUtf8("SM PACK"), LargeUtf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = LargeUtf8("Brand#23") AND part.p_container IN ([LargeUtf8("MED BAG"), LargeUtf8("MED BOX"), LargeUtf8("MED PKG"), LargeUtf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = LargeUtf8("Brand#34") AND part.p_container IN ([LargeUtf8("LG CASE"), LargeUtf8("LG BOX"), LargeUtf8("LG PACK"), LargeUtf8("LG PKG")]) AND part.p_size <= Int32(15)] | | physical_plan | ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] | @@ -30,18 +24,11 @@ description: "Query: tpch_q19" | | CayenneAccelerationExec | | | BytesProcessedExec | | | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_brand, p_size, p_container], file_type=vortex, predicate: (p_brand@3 = Brand#12 AND p_container@6 IN (SET) ([SM CASE, SM BOX, SM PACK, SM PKG]) AND p_size@5 <= 5 OR p_brand@3 = Brand#23 AND p_container@6 IN (SET) ([MED BAG, MED BOX, MED PKG, MED PACK]) AND p_size@5 <= 10 OR p_brand@3 = Brand#34 AND p_container@6 IN (SET) ([LG CASE, LG BOX, LG PACK, LG PKG]) AND p_size@5 <= 15) AND p_size@5 >= 1 | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@0)] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_brand, p_size, p_container], file_type=vortex, predicate: p_size@5 >= 1 AND (p_brand@3 = Brand#12 AND p_container@6 IN (SET) ([SM CASE, SM BOX, SM PACK, SM PKG]) AND p_size@5 <= 5 OR p_brand@3 = Brand#23 AND p_container@6 IN (SET) ([MED BAG, MED BOX, MED PKG, MED PACK]) AND p_size@5 <= 10 OR p_brand@3 = Brand#34 AND p_container@6 IN (SET) ([LG CASE, LG BOX, LG PACK, LG PKG]) AND p_size@5 <= 15) | -| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], file_type=vortex, predicate: (l_quantity@4 >= Some(100),15,2 AND l_quantity@4 <= Some(1100),15,2 OR l_quantity@4 >= Some(1000),15,2 AND l_quantity@4 <= Some(2000),15,2 OR l_quantity@4 >= Some(2000),15,2 AND l_quantity@4 <= Some(3000),15,2) AND (l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON | +| | RepartitionExec: partitioning=Hash([l_partkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], file_type=vortex, predicate: (l_quantity@4 >= Some(100),15,2 AND l_quantity@4 <= Some(1100),15,2 OR l_quantity@4 >= Some(1000),15,2 AND l_quantity@4 <= Some(2000),15,2 OR l_quantity@4 >= Some(2000),15,2 AND l_quantity@4 <= Some(3000),15,2) AND (l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND DynamicFilter [ empty ] | | | | +---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q20_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q20_explain_sf100.snap index 099967317b..12a4c8e903 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q20_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q20_explain_sf100.snap @@ -2,115 +2,70 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q20" --- -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: supplier.s_name ASC NULLS LAST | -| | Projection: supplier.s_name, supplier.s_address | -| | LeftSemi Join: supplier.s_suppkey = __correlated_sq_2.ps_suppkey | -| | Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | LeftSemi Join: supplier.s_nationkey = __correlated_sq_8.n_nationkey | -| | TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey] | -| | SubqueryAlias: __correlated_sq_8 | -| | SubqueryAlias: __cayenne_xclos___6 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("CANADA") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("CANADA")] | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("CANADA") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("CANADA")] | -| | LeftSemi Join: __correlated_sq_2.ps_suppkey = __correlated_sq_5.s_suppkey | -| | SubqueryAlias: __correlated_sq_2 | -| | Projection: partsupp.ps_suppkey | -| | Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) | -| | LeftSemi Join: partsupp.ps_partkey = __correlated_sq_1.p_partkey | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] | -| | SubqueryAlias: __correlated_sq_1 | -| | Projection: part.p_partkey | -| | Filter: part.p_name LIKE LargeUtf8("forest%") | -| | TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE LargeUtf8("forest%")] | -| | SubqueryAlias: __scalar_sq_3 | -| | Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey | -| | Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] | -| | Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity | -| | Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") | -| | TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01")] | -| | SubqueryAlias: __correlated_sq_5 | -| | SubqueryAlias: __cayenne_xclos___4 | -| | Projection: supplier.s_suppkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | LeftSemi Join: supplier.s_nationkey = __correlated_sq_9.n_nationkey | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | SubqueryAlias: __correlated_sq_9 | -| | SubqueryAlias: __cayenne_xclos___7 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("CANADA") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("CANADA")] | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("CANADA") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("CANADA")] | -| physical_plan | SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] | -| | SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] | -| | HashJoinExec: mode=Partitioned, join_type=LeftSemi, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = CANADA | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@3)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = CANADA | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=vortex | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@0)] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_suppkey@1] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = CANADA | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@1)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = CANADA | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([ps_suppkey@0], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] | -| | RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, ps_partkey@0)] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_name], file_type=vortex, predicate: p_name@1 LIKE forest% | -| | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=vortex | -| | ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] | -| | AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] | -| | RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] | -| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=vortex, predicate: l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND DynamicFilter [ empty ] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: supplier.s_name ASC NULLS LAST | +| | Projection: supplier.s_name, supplier.s_address | +| | LeftSemi Join: supplier.s_suppkey = __correlated_sq_2.ps_suppkey | +| | Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey] | +| | Projection: nation.n_nationkey | +| | Filter: nation.n_name = LargeUtf8("CANADA") | +| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("CANADA")] | +| | SubqueryAlias: __correlated_sq_2 | +| | Projection: partsupp.ps_suppkey | +| | Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) | +| | LeftSemi Join: partsupp.ps_partkey = __correlated_sq_1.p_partkey | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] | +| | SubqueryAlias: __correlated_sq_1 | +| | Projection: part.p_partkey | +| | Filter: part.p_name LIKE LargeUtf8("forest%") | +| | TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE LargeUtf8("forest%")] | +| | SubqueryAlias: __scalar_sq_3 | +| | Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey | +| | Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] | +| | Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity | +| | Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") | +| | TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01")] | +| physical_plan | SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] | +| | SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] | +| | HashJoinExec: mode=Partitioned, join_type=LeftSemi, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] | +| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = CANADA | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([ps_suppkey@0], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] | +| | RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, ps_partkey@0)] | +| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_name], file_type=vortex, predicate: p_name@1 LIKE forest% | +| | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=vortex | +| | ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] | +| | AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] | +| | RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] | +| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=vortex, predicate: l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND DynamicFilter [ empty ] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q21_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q21_explain_sf100.snap index c017fafde7..a8329b8d0d 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q21_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q21_explain_sf100.snap @@ -2,96 +2,84 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q21" --- -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST, fetch=100 | -| | Projection: supplier.s_name, count(Int64(1)) AS numwait | -| | Aggregate: groupBy=[[supplier.s_name]], aggr=[[count(Int64(1))]] | -| | Projection: supplier.s_name | -| | LeftAnti Join: l1.l_orderkey = __correlated_sq_2.l_orderkey Filter: __correlated_sq_2.l_suppkey != l1.l_suppkey | -| | LeftSemi Join: l1.l_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey | -| | Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | LeftSemi Join: supplier.s_nationkey = __correlated_sq_4.n_nationkey | -| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | -| | Inner Join: l1.l_orderkey = orders.o_orderkey | -| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | -| | Inner Join: supplier.s_suppkey = l1.l_suppkey | -| | TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] | -| | SubqueryAlias: l1 | -| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | -| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] | -| | Projection: orders.o_orderkey | -| | Filter: orders.o_orderstatus = LargeUtf8("F") | -| | TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus = LargeUtf8("F")] | -| | SubqueryAlias: __correlated_sq_4 | -| | SubqueryAlias: __cayenne_xclos___3 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("SAUDI ARABIA") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("SAUDI ARABIA")] | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("SAUDI ARABIA") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("SAUDI ARABIA")] | -| | SubqueryAlias: __correlated_sq_1 | -| | SubqueryAlias: l2 | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey] | -| | SubqueryAlias: __correlated_sq_2 | -| | SubqueryAlias: l3 | -| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | -| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] | -| physical_plan | SortPreservingMergeExec: [numwait@1 DESC, s_name@0 ASC NULLS LAST], fetch=100 | -| | SortExec: TopK(fetch=100), expr=[numwait@1 DESC, s_name@0 ASC NULLS LAST], preserve_partitioning=[true] | -| | ProjectionExec: expr=[s_name@0 as s_name, count(Int64(1))@1 as numwait] | -| | AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(Int64(1))] | -| | RepartitionExec: partitioning=Hash([s_name@0], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(Int64(1))] | -| | HashJoinExec: mode=Partitioned, join_type=LeftAnti, accumulator=MinMaxLeftAccumulator, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] | -| | HashJoinExec: mode=Partitioned, join_type=LeftSemi, accumulator=MinMaxLeftAccumulator, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = SAUDI ARABIA | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@1)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = SAUDI ARABIA | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] | -| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_orderstatus], file_type=vortex, predicate: o_orderstatus@2 = F | -| | RepartitionExec: partitioning=Hash([l_orderkey@2], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_name, s_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([l_suppkey@1], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=vortex, predicate: l_receiptdate@12 > l_commitdate@11 AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=vortex, predicate: l_receiptdate@12 > l_commitdate@11 | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST, fetch=100 | +| | Projection: supplier.s_name, count(Int64(1)) AS numwait | +| | Aggregate: groupBy=[[supplier.s_name]], aggr=[[count(Int64(1))]] | +| | Projection: supplier.s_name | +| | LeftAnti Join: l1.l_orderkey = __correlated_sq_2.l_orderkey Filter: __correlated_sq_2.l_suppkey != l1.l_suppkey | +| | LeftSemi Join: l1.l_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey | +| | Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | +| | Inner Join: l1.l_orderkey = orders.o_orderkey | +| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | +| | Inner Join: supplier.s_suppkey = l1.l_suppkey | +| | TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] | +| | SubqueryAlias: l1 | +| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | +| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] | +| | Projection: orders.o_orderkey | +| | Filter: orders.o_orderstatus = LargeUtf8("F") | +| | TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus = LargeUtf8("F")] | +| | Projection: nation.n_nationkey | +| | Filter: nation.n_name = LargeUtf8("SAUDI ARABIA") | +| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("SAUDI ARABIA")] | +| | SubqueryAlias: __correlated_sq_1 | +| | SubqueryAlias: l2 | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey] | +| | SubqueryAlias: __correlated_sq_2 | +| | SubqueryAlias: l3 | +| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | +| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] | +| physical_plan | SortPreservingMergeExec: [numwait@1 DESC, s_name@0 ASC NULLS LAST], fetch=100 | +| | SortExec: TopK(fetch=100), expr=[numwait@1 DESC, s_name@0 ASC NULLS LAST], preserve_partitioning=[true] | +| | ProjectionExec: expr=[s_name@0 as s_name, count(Int64(1))@1 as numwait] | +| | AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(Int64(1))] | +| | RepartitionExec: partitioning=Hash([s_name@0], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(Int64(1))] | +| | HashJoinExec: mode=Partitioned, join_type=LeftAnti, accumulator=MinMaxLeftAccumulator, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] | +| | HashJoinExec: mode=Partitioned, join_type=LeftSemi, accumulator=MinMaxLeftAccumulator, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] | +| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = SAUDI ARABIA | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] | +| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_orderstatus], file_type=vortex, predicate: o_orderstatus@2 = F | +| | RepartitionExec: partitioning=Hash([l_orderkey@2], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_name, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_suppkey@1], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=vortex, predicate: l_receiptdate@12 > l_commitdate@11 AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey], file_type=vortex | +| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=vortex, predicate: l_receiptdate@12 > l_commitdate@11 | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q2_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q2_explain_sf100.snap index a39d3c7729..7f4eb93000 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q2_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q2_explain_sf100.snap @@ -2,133 +2,109 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q2" --- -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=100 | -| | Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment | -| | Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost) | -| | Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name | -| | Inner Join: nation.n_regionkey = region.r_regionkey | -| | LeftSemi Join: nation.n_regionkey = __correlated_sq_5.r_regionkey | -| | Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost | -| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | -| | Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost | -| | Inner Join: part.p_partkey = partsupp.ps_partkey | -| | Projection: part.p_partkey, part.p_mfgr | -| | Filter: part.p_size = Int32(15) AND part.p_type LIKE LargeUtf8("%BRASS") | -| | TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE LargeUtf8("%BRASS")] | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | -| | TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] | -| | TableScan: nation projection=[n_nationkey, n_name, n_regionkey] | -| | SubqueryAlias: __correlated_sq_5 | -| | SubqueryAlias: __cayenne_xclos___4 | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("EUROPE") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("EUROPE")] | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("EUROPE") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("EUROPE")] | -| | SubqueryAlias: __scalar_sq_1 | -| | Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey | -| | Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]] | -| | Projection: partsupp.ps_partkey, partsupp.ps_supplycost | -| | Inner Join: nation.n_regionkey = region.r_regionkey | -| | LeftSemi Join: nation.n_regionkey = __correlated_sq_3.r_regionkey | -| | Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey | -| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | TableScan: nation projection=[n_nationkey, n_regionkey] | -| | SubqueryAlias: __correlated_sq_3 | -| | SubqueryAlias: __cayenne_xclos___2 | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("EUROPE") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("EUROPE")] | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("EUROPE") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("EUROPE")] | -| physical_plan | SortPreservingMergeExec: [s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], fetch=100 | -| | SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] | -| | ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] | -| | RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 16), input_partitions=16 | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = EUROPE | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@9)] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = EUROPE | -| | ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name, n_regionkey], file_type=vortex | -| | ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([ps_suppkey@2], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_mfgr, p_type, p_size], file_type=vortex, predicate: p_size@5 = 15 AND p_type@4 LIKE %BRASS | -| | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] AND DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] | -| | AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] | -| | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = EUROPE | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@2)] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = EUROPE | -| | ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_regionkey], file_type=vortex | -| | ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([ps_suppkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=100 | +| | Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment | +| | Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost) | +| | Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name | +| | Inner Join: nation.n_regionkey = region.r_regionkey | +| | Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost | +| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | +| | Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost | +| | Inner Join: part.p_partkey = partsupp.ps_partkey | +| | Projection: part.p_partkey, part.p_mfgr | +| | Filter: part.p_size = Int32(15) AND part.p_type LIKE LargeUtf8("%BRASS") | +| | TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(15), part.p_type LIKE LargeUtf8("%BRASS")] | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | +| | TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] | +| | TableScan: nation projection=[n_nationkey, n_name, n_regionkey] | +| | Projection: region.r_regionkey | +| | Filter: region.r_name = LargeUtf8("EUROPE") | +| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("EUROPE")] | +| | SubqueryAlias: __scalar_sq_1 | +| | Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey | +| | Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]] | +| | Projection: partsupp.ps_partkey, partsupp.ps_supplycost | +| | Inner Join: nation.n_regionkey = region.r_regionkey | +| | Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey | +| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | TableScan: nation projection=[n_nationkey, n_regionkey] | +| | Projection: region.r_regionkey | +| | Filter: region.r_name = LargeUtf8("EUROPE") | +| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("EUROPE")] | +| physical_plan | SortPreservingMergeExec: [s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], fetch=100 | +| | SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC, n_name@2 ASC NULLS LAST, s_name@1 ASC NULLS LAST, p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] | +| | ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] | +| | RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 16), input_partitions=16 | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] | +| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = EUROPE | +| | ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name, n_regionkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([ps_suppkey@2], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] | +| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_mfgr, p_type, p_size], file_type=vortex, predicate: p_size@5 = 15 AND p_type@4 LIKE %BRASS | +| | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] AND DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] | +| | AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] | +| | RepartitionExec: partitioning=Hash([ps_partkey@0], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] | +| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = EUROPE | +| | ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_regionkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([ps_suppkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q3_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q3_explain_sf100.snap index 25bf7ba693..4db54ed825 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q3_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q3_explain_sf100.snap @@ -15,14 +15,8 @@ description: "Query: tpch_q3" | | Projection: customer.c_custkey | | | Filter: customer.c_mktsegment = LargeUtf8("BUILDING") | | | TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = LargeUtf8("BUILDING")] | -| | LeftSemi Join: orders.o_custkey = __correlated_sq_2.c_custkey | -| | Filter: orders.o_orderdate < Date32("1995-03-15") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")] | -| | SubqueryAlias: __correlated_sq_2 | -| | SubqueryAlias: __cayenne_xclos___1 | -| | Projection: customer.c_custkey | -| | Filter: customer.c_mktsegment = LargeUtf8("BUILDING") | -| | TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = LargeUtf8("BUILDING")] | +| | Filter: orders.o_orderdate < Date32("1995-03-15") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")] | | | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | | | Filter: lineitem.l_shipdate > Date32("1995-03-15") | | | TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")] | @@ -39,18 +33,11 @@ description: "Query: tpch_q3" | | CayenneAccelerationExec | | | BytesProcessedExec | | | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_mktsegment], file_type=vortex, predicate: c_mktsegment@6 = BUILDING | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@1)] | -| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[c_custkey@0 as c_custkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_mktsegment], file_type=vortex, predicate: c_mktsegment@6 = BUILDING | -| | RepartitionExec: partitioning=Hash([o_custkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], file_type=vortex, predicate: o_orderdate@4 < 1995-03-15 | +| | RepartitionExec: partitioning=Hash([o_custkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], file_type=vortex, predicate: o_orderdate@4 < 1995-03-15 AND DynamicFilter [ empty ] | | | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | | | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | | | SchemaCastScanExec | diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q5_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q5_explain_sf100.snap index 604f64ddcc..d08d85fcfc 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q5_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q5_explain_sf100.snap @@ -2,88 +2,76 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q5" --- -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: revenue DESC NULLS FIRST | -| | Projection: nation.n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue | -| | Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name | -| | Inner Join: nation.n_regionkey = region.r_regionkey | -| | LeftSemi Join: nation.n_regionkey = __correlated_sq_2.r_regionkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | -| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey | -| | Projection: customer.c_nationkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | -| | Projection: customer.c_nationkey, orders.o_orderkey | -| | Inner Join: customer.c_custkey = orders.o_custkey | -| | TableScan: customer projection=[c_custkey, c_nationkey] | -| | Projection: orders.o_orderkey, orders.o_custkey | -| | Filter: orders.o_orderdate >= Date32("1994-01-01") AND orders.o_orderdate < Date32("1995-01-01") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1994-01-01"), orders.o_orderdate < Date32("1995-01-01")] | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | TableScan: nation projection=[n_nationkey, n_name, n_regionkey] | -| | SubqueryAlias: __correlated_sq_2 | -| | SubqueryAlias: __cayenne_xclos___1 | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("ASIA") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("ASIA")] | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("ASIA") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("ASIA")] | -| physical_plan | SortPreservingMergeExec: [revenue@1 DESC] | -| | SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] | -| | ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] | -| | AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | RepartitionExec: partitioning=Hash([n_name@0], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = ASIA | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@3)] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = ASIA | -| | ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name, n_regionkey], file_type=vortex | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] | -| | RepartitionExec: partitioning=Hash([o_orderkey@1], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] | -| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([o_custkey@1], 16), input_partitions=16 | -| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=vortex, predicate: o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01 AND DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: revenue DESC NULLS FIRST | +| | Projection: nation.n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue | +| | Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name | +| | Inner Join: nation.n_regionkey = region.r_regionkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | +| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey | +| | Projection: customer.c_nationkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | +| | Projection: customer.c_nationkey, orders.o_orderkey | +| | Inner Join: customer.c_custkey = orders.o_custkey | +| | TableScan: customer projection=[c_custkey, c_nationkey] | +| | Projection: orders.o_orderkey, orders.o_custkey | +| | Filter: orders.o_orderdate >= Date32("1994-01-01") AND orders.o_orderdate < Date32("1995-01-01") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1994-01-01"), orders.o_orderdate < Date32("1995-01-01")] | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | TableScan: nation projection=[n_nationkey, n_name, n_regionkey] | +| | Projection: region.r_regionkey | +| | Filter: region.r_name = LargeUtf8("ASIA") | +| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("ASIA")] | +| physical_plan | SortPreservingMergeExec: [revenue@1 DESC] | +| | SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] | +| | ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] | +| | AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | RepartitionExec: partitioning=Hash([n_name@0], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] | +| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = ASIA | +| | ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name, n_regionkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] | +| | RepartitionExec: partitioning=Hash([o_orderkey@1], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] | +| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_nationkey], file_type=vortex | +| | RepartitionExec: partitioning=Hash([o_custkey@1], 16), input_partitions=16 | +| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=vortex, predicate: o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01 AND DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q7_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q7_explain_sf100.snap index d433195146..1cbc63934b 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q7_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q7_explain_sf100.snap @@ -2,104 +2,78 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q7" --- -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST | -| | Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, sum(shipping.volume) AS revenue | -| | Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[sum(shipping.volume)]] | -| | SubqueryAlias: shipping | -| | Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, date_part(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume | -| | Inner Join: customer.c_nationkey = n2.n_nationkey Filter: n1.n_name = LargeUtf8("FRANCE") AND n2.n_name = LargeUtf8("GERMANY") OR n1.n_name = LargeUtf8("GERMANY") AND n2.n_name = LargeUtf8("FRANCE") | -| | LeftSemi Join: customer.c_nationkey = __correlated_sq_3.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name | -| | Inner Join: supplier.s_nationkey = n1.n_nationkey | -| | LeftSemi Join: supplier.s_nationkey = __correlated_sq_4.n_nationkey | -| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey | -| | Inner Join: orders.o_custkey = customer.c_custkey | -| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey | -| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | -| | Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate | -| | Inner Join: supplier.s_suppkey = lineitem.l_suppkey | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | Filter: lineitem.l_shipdate >= Date32("1995-01-01") AND lineitem.l_shipdate <= Date32("1996-12-31") | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-01-01"), lineitem.l_shipdate <= Date32("1996-12-31")] | -| | TableScan: orders projection=[o_orderkey, o_custkey] | -| | TableScan: customer projection=[c_custkey, c_nationkey] | -| | SubqueryAlias: __correlated_sq_4 | -| | SubqueryAlias: __cayenne_xclos___2 | -| | SubqueryAlias: n1 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("FRANCE") OR nation.n_name = LargeUtf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("FRANCE") OR nation.n_name = LargeUtf8("GERMANY")] | -| | SubqueryAlias: n1 | -| | Filter: nation.n_name = LargeUtf8("FRANCE") OR nation.n_name = LargeUtf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("FRANCE") OR nation.n_name = LargeUtf8("GERMANY")] | -| | SubqueryAlias: __correlated_sq_3 | -| | SubqueryAlias: __cayenne_xclos___1 | -| | SubqueryAlias: n2 | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = LargeUtf8("GERMANY") OR nation.n_name = LargeUtf8("FRANCE") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY") OR nation.n_name = LargeUtf8("FRANCE")] | -| | SubqueryAlias: n2 | -| | Filter: nation.n_name = LargeUtf8("GERMANY") OR nation.n_name = LargeUtf8("FRANCE") | -| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY") OR nation.n_name = LargeUtf8("FRANCE")] | -| physical_plan | SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST] | -| | SortExec: expr=[supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST], preserve_partitioning=[true] | -| | ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] | -| | AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] | -| | RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] | -| | ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = FRANCE AND n_name@1 = GERMANY OR n_name@0 = GERMANY AND n_name@1 = FRANCE, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY OR n_name@1 = FRANCE | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, c_nationkey@3)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY OR n_name@1 = FRANCE | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = FRANCE OR n_name@1 = GERMANY | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@0)] | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = FRANCE OR n_name@1 = GERMANY | -| | ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] | -| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([o_custkey@4], 16), input_partitions=16 | -| | ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@0 as o_custkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@1)], projection=[o_custkey@1, s_nationkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] | -| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_orderkey@1], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex | -| | RepartitionExec: partitioning=Hash([l_suppkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=vortex, predicate: l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31 AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] | -| | | -+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST | +| | Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, sum(shipping.volume) AS revenue | +| | Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[sum(shipping.volume)]] | +| | SubqueryAlias: shipping | +| | Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, date_part(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume | +| | Inner Join: customer.c_nationkey = n2.n_nationkey Filter: n1.n_name = LargeUtf8("FRANCE") AND n2.n_name = LargeUtf8("GERMANY") OR n1.n_name = LargeUtf8("GERMANY") AND n2.n_name = LargeUtf8("FRANCE") | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name | +| | Inner Join: supplier.s_nationkey = n1.n_nationkey | +| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey | +| | Inner Join: orders.o_custkey = customer.c_custkey | +| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey | +| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | +| | Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate | +| | Inner Join: supplier.s_suppkey = lineitem.l_suppkey | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | Filter: lineitem.l_shipdate >= Date32("1995-01-01") AND lineitem.l_shipdate <= Date32("1996-12-31") | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-01-01"), lineitem.l_shipdate <= Date32("1996-12-31")] | +| | TableScan: orders projection=[o_orderkey, o_custkey] | +| | TableScan: customer projection=[c_custkey, c_nationkey] | +| | SubqueryAlias: n1 | +| | Filter: nation.n_name = LargeUtf8("FRANCE") OR nation.n_name = LargeUtf8("GERMANY") | +| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("FRANCE") OR nation.n_name = LargeUtf8("GERMANY")] | +| | SubqueryAlias: n2 | +| | Filter: nation.n_name = LargeUtf8("GERMANY") OR nation.n_name = LargeUtf8("FRANCE") | +| | TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = LargeUtf8("GERMANY") OR nation.n_name = LargeUtf8("FRANCE")] | +| physical_plan | SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST] | +| | SortExec: expr=[supp_nation@0 ASC NULLS LAST, cust_nation@1 ASC NULLS LAST, l_year@2 ASC NULLS LAST], preserve_partitioning=[true] | +| | ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] | +| | AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] | +| | RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] | +| | ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = FRANCE AND n_name@1 = GERMANY OR n_name@0 = GERMANY AND n_name@1 = FRANCE, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = GERMANY OR n_name@1 = FRANCE | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex, predicate: n_name@1 = FRANCE OR n_name@1 = GERMANY | +| | ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] | +| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([o_custkey@4], 16), input_partitions=16 | +| | ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@0 as o_custkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@1)], projection=[o_custkey@1, s_nationkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] | +| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_orderkey@1], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_suppkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=vortex, predicate: l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31 AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q8_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q8_explain_sf100.snap index 32e3d605ac..3e35ea85b5 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q8_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q8_explain_sf100.snap @@ -2,127 +2,102 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q8" --- -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: all_nations.o_year ASC NULLS LAST | -| | Projection: all_nations.o_year, sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) / sum(all_nations.volume) AS mkt_share | -| | Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = LargeUtf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] | -| | SubqueryAlias: all_nations | -| | Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation | -| | Inner Join: n1.n_regionkey = region.r_regionkey | -| | LeftSemi Join: n1.n_regionkey = __correlated_sq_3.r_regionkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name | -| | Inner Join: supplier.s_nationkey = n2.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey | -| | Inner Join: customer.c_nationkey = n1.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey | -| | Inner Join: orders.o_custkey = customer.c_custkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate | -| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | -| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | -| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | -| | Projection: lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: part.p_partkey = lineitem.l_partkey | -| | Projection: part.p_partkey | -| | Filter: part.p_type = LargeUtf8("ECONOMY ANODIZED STEEL") | -| | TableScan: part projection=[p_partkey, p_type], partial_filters=[part.p_type = LargeUtf8("ECONOMY ANODIZED STEEL")] | -| | LeftSemi Join: lineitem.l_partkey = __correlated_sq_4.p_partkey | -| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] | -| | SubqueryAlias: __correlated_sq_4 | -| | SubqueryAlias: __cayenne_xclos___2 | -| | Projection: part.p_partkey | -| | Filter: part.p_type = LargeUtf8("ECONOMY ANODIZED STEEL") | -| | TableScan: part projection=[p_partkey, p_type], partial_filters=[part.p_type = LargeUtf8("ECONOMY ANODIZED STEEL")] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | Filter: orders.o_orderdate >= Date32("1995-01-01") AND orders.o_orderdate <= Date32("1996-12-31") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1995-01-01"), orders.o_orderdate <= Date32("1996-12-31")] | -| | TableScan: customer projection=[c_custkey, c_nationkey] | -| | SubqueryAlias: n1 | -| | TableScan: nation projection=[n_nationkey, n_regionkey] | -| | SubqueryAlias: n2 | -| | TableScan: nation projection=[n_nationkey, n_name] | -| | SubqueryAlias: __correlated_sq_3 | -| | SubqueryAlias: __cayenne_xclos___1 | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("AMERICA") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("AMERICA")] | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = LargeUtf8("AMERICA") | -| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("AMERICA")] | -| physical_plan | SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] | -| | SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] | -| | ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] | -| | AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] | -| | RepartitionExec: partitioning=Hash([o_year@0], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] | -| | ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = AMERICA | -| | HashJoinExec: mode=CollectLeft, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@3)] | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = AMERICA | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_regionkey], file_type=vortex | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] | -| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([o_custkey@3], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] | -| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=vortex, predicate: o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31 AND DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_suppkey@1], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_type], file_type=vortex, predicate: p_type@4 = ECONOMY ANODIZED STEEL | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_type], file_type=vortex, predicate: p_type@4 = ECONOMY ANODIZED STEEL | -| | RepartitionExec: partitioning=Hash([l_partkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount], file_type=vortex | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: all_nations.o_year ASC NULLS LAST | +| | Projection: all_nations.o_year, sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) / sum(all_nations.volume) AS mkt_share | +| | Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = LargeUtf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] | +| | SubqueryAlias: all_nations | +| | Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation | +| | Inner Join: n1.n_regionkey = region.r_regionkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name | +| | Inner Join: supplier.s_nationkey = n2.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey | +| | Inner Join: customer.c_nationkey = n1.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey | +| | Inner Join: orders.o_custkey = customer.c_custkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate | +| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | +| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | +| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | +| | Projection: lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: part.p_partkey = lineitem.l_partkey | +| | Projection: part.p_partkey | +| | Filter: part.p_type = LargeUtf8("ECONOMY ANODIZED STEEL") | +| | TableScan: part projection=[p_partkey, p_type], partial_filters=[part.p_type = LargeUtf8("ECONOMY ANODIZED STEEL")] | +| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | Filter: orders.o_orderdate >= Date32("1995-01-01") AND orders.o_orderdate <= Date32("1996-12-31") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1995-01-01"), orders.o_orderdate <= Date32("1996-12-31")] | +| | TableScan: customer projection=[c_custkey, c_nationkey] | +| | SubqueryAlias: n1 | +| | TableScan: nation projection=[n_nationkey, n_regionkey] | +| | SubqueryAlias: n2 | +| | TableScan: nation projection=[n_nationkey, n_name] | +| | Projection: region.r_regionkey | +| | Filter: region.r_name = LargeUtf8("AMERICA") | +| | TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = LargeUtf8("AMERICA")] | +| physical_plan | SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] | +| | SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] | +| | ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] | +| | AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] | +| | RepartitionExec: partitioning=Hash([o_year@0], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] | +| | ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] | +| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[r_regionkey, r_name], file_type=vortex, predicate: r_name@1 = AMERICA | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_regionkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] | +| | RepartitionExec: partitioning=Hash([c_custkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[c_custkey, c_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([o_custkey@3], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] | +| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=vortex, predicate: o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31 AND DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_suppkey@1], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] | +| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_type], file_type=vortex, predicate: p_type@4 = ECONOMY ANODIZED STEEL | +| | RepartitionExec: partitioning=Hash([l_partkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount], file_type=vortex, predicate: DynamicFilter [ empty ] AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q9_explain_sf100.snap b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q9_explain_sf100.snap index a4df536d53..d277178a8b 100644 --- a/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q9_explain_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/explain/test_framework__snapshot__s3[parquet]-cayenne[file]_tpch_q9_explain_sf100.snap @@ -2,91 +2,78 @@ source: crates/test-framework/src/snapshot/mod.rs description: "Query: tpch_q9" --- -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST | -| | Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit | -| | Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]] | -| | SubqueryAlias: profit | -| | Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate | -| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | -| | Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost | -| | Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey | -| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | -| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | -| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: part.p_partkey = lineitem.l_partkey | -| | Projection: part.p_partkey | -| | Filter: part.p_name LIKE LargeUtf8("%green%") | -| | TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE LargeUtf8("%green%")] | -| | LeftSemi Join: lineitem.l_partkey = __correlated_sq_2.p_partkey | -| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] | -| | SubqueryAlias: __correlated_sq_2 | -| | SubqueryAlias: __cayenne_xclos___1 | -| | Projection: part.p_partkey | -| | Filter: part.p_name LIKE LargeUtf8("%green%") | -| | TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE LargeUtf8("%green%")] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | -| | TableScan: orders projection=[o_orderkey, o_orderdate] | -| | TableScan: nation projection=[n_nationkey, n_name] | -| physical_plan | SortPreservingMergeExec: [nation@0 ASC NULLS LAST, o_year@1 DESC] | -| | SortExec: expr=[nation@0 ASC NULLS LAST, o_year@1 DESC], preserve_partitioning=[true] | -| | ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] | -| | AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] | -| | RepartitionExec: partitioning=Hash([nation@0, o_year@1], 16), input_partitions=16 | -| | AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] | -| | ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] | -| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex | -| | ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] | -| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_orderdate], file_type=vortex | -| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] | -| | RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=vortex | -| | RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 16), input_partitions=16 | -| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] | -| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | -| | RepartitionExec: partitioning=Hash([l_suppkey@2], 16), input_partitions=16 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_name], file_type=vortex, predicate: p_name@1 LIKE %green% | -| | HashJoinExec: mode=Partitioned, join_type=RightSemi, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)] | -| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_name], file_type=vortex, predicate: p_name@1 LIKE %green% | -| | RepartitionExec: partitioning=Hash([l_partkey@1], 16), input_partitions=16 | -| | SchemaCastScanExec | -| | CayenneAccelerationExec | -| | BytesProcessedExec | -| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], file_type=vortex | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST | +| | Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit | +| | Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]] | +| | SubqueryAlias: profit | +| | Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate | +| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | +| | Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost | +| | Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey | +| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | +| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | +| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: part.p_partkey = lineitem.l_partkey | +| | Projection: part.p_partkey | +| | Filter: part.p_name LIKE LargeUtf8("%green%") | +| | TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE LargeUtf8("%green%")] | +| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | +| | TableScan: orders projection=[o_orderkey, o_orderdate] | +| | TableScan: nation projection=[n_nationkey, n_name] | +| physical_plan | SortPreservingMergeExec: [nation@0 ASC NULLS LAST, o_year@1 DESC] | +| | SortExec: expr=[nation@0 ASC NULLS LAST, o_year@1 DESC], preserve_partitioning=[true] | +| | ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] | +| | AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] | +| | RepartitionExec: partitioning=Hash([nation@0, o_year@1], 16), input_partitions=16 | +| | AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] | +| | ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] | +| | HashJoinExec: mode=CollectLeft, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={1 group: []}, projection=[n_nationkey, n_name], file_type=vortex | +| | ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] | +| | RepartitionExec: partitioning=Hash([o_orderkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[o_orderkey, o_orderdate], file_type=vortex | +| | RepartitionExec: partitioning=Hash([l_orderkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] | +| | RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=vortex | +| | RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 16), input_partitions=16 | +| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] | +| | RepartitionExec: partitioning=Hash([s_suppkey@0], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[s_suppkey, s_nationkey], file_type=vortex, predicate: DynamicFilter [ empty ] | +| | RepartitionExec: partitioning=Hash([l_suppkey@2], 16), input_partitions=16 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, accumulator=MinMaxLeftAccumulator, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] | +| | RepartitionExec: partitioning=Hash([p_partkey@0], 16), input_partitions=16 | +| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[p_partkey, p_name], file_type=vortex, predicate: p_name@1 LIKE %green% | +| | RepartitionExec: partitioning=Hash([l_partkey@1], 16), input_partitions=16 | +| | SchemaCastScanExec | +| | CayenneAccelerationExec | +| | BytesProcessedExec | +| | DataSourceExec: file_groups={16 groups: []}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], file_type=vortex, predicate: DynamicFilter [ empty ] AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] AND DynamicFilter [ empty ] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/crates/test-framework/src/snapshot/snapshots/results/test_framework__spicetest__datasets__worker__s3[parquet]-cayenne[file]_tpch_q1_sf100.snap b/crates/test-framework/src/snapshot/snapshots/results/test_framework__spicetest__datasets__worker__s3[parquet]-cayenne[file]_tpch_q1_sf100.snap index 2f859b4715..70b51b6fbe 100644 --- a/crates/test-framework/src/snapshot/snapshots/results/test_framework__spicetest__datasets__worker__s3[parquet]-cayenne[file]_tpch_q1_sf100.snap +++ b/crates/test-framework/src/snapshot/snapshots/results/test_framework__spicetest__datasets__worker__s3[parquet]-cayenne[file]_tpch_q1_sf100.snap @@ -2,11 +2,11 @@ source: crates/test-framework/src/spicetest/datasets/worker.rs description: "Query: tpch_q1" --- -+--------------+--------------+---------------+-------------------+---------------------+-----------------------+--------------------+--------------------+---------------------+-------------+ -| l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order | -+--------------+--------------+---------------+-------------------+---------------------+-----------------------+--------------------+--------------------+---------------------+-------------+ -| A | F | 3775127758.00 | 5660776097194.45 | 5377736398183.9374 | 5592847429515.927026 | 25.499370423275426 | 38236.116984306784 | 0.05000224353247517 | 148047881 | -| N | F | 98553062.00 | 147771098385.98 | 140384965965.0348 | 145999793032.775829 | 25.501556956882876 | 38237.1993888047 | 0.04998528433812533 | 3864590 | -| N | O | 7436302976.00 | 11150725681373.59 | 10593195308234.8523 | 11016932248183.655467 | 25.500009404374193 | 38237.22764636037 | 0.04999791831849406 | 291619617 | -| R | F | 3775724970.00 | 5661603032745.34 | 5378513563915.4097 | 5593662252666.916161 | 25.50006628406532 | 38236.697258451975 | 0.05000130434119828 | 148067261 | -+--------------+--------------+---------------+-------------------+---------------------+-----------------------+--------------------+--------------------+---------------------+-------------+ ++--------------+--------------+---------------+-------------------+---------------------+-----------------------+--------------------+-------------------+----------------------+-------------+ +| l_returnflag | l_linestatus | sum_qty | sum_base_price | sum_disc_price | sum_charge | avg_qty | avg_price | avg_disc | count_order | ++--------------+--------------+---------------+-------------------+---------------------+-----------------------+--------------------+-------------------+----------------------+-------------+ +| A | F | 3775127758.00 | 5660776097194.45 | 5377736398183.9374 | 5592847429515.927026 | 25.499370423275426 | 38236.11698430637 | 0.050002243532475114 | 148047881 | +| N | F | 98553062.00 | 147771098385.98 | 140384965965.0348 | 145999793032.775829 | 25.501556956882876 | 38237.19938880473 | 0.04998528433812535 | 3864590 | +| N | O | 7436302976.00 | 11150725681373.59 | 10593195308234.8523 | 11016932248183.655467 | 25.500009404374193 | 38237.2276463607 | 0.049997918318494414 | 291619617 | +| R | F | 3775724970.00 | 5661603032745.34 | 5378513563915.4097 | 5593662252666.916161 | 25.50006628406532 | 38236.69725845202 | 0.05000130434119746 | 148067261 | ++--------------+--------------+---------------+-------------------+---------------------+-----------------------+--------------------+-------------------+----------------------+-------------+ From d328d9a4fccc28cb8182848ce5a05f69ba1836d2 Mon Sep 17 00:00:00 2001 From: Phillip LeBlanc <879445+phillipleblanc@users.noreply.github.com> Date: Wed, 20 May 2026 05:33:37 +0900 Subject: [PATCH 5/5] feat(mcp): support auth for streamable HTTP tools (#10927) Co-authored-by: Sergei Grebnov --- bin/spice/src/commands/component.rs | 10 +- crates/runtime/src/tools/mcp/catalog.rs | 20 ++- crates/runtime/src/tools/mcp/mod.rs | 161 +++++++++++++++++++++++- crates/runtime/tests/models/tools.rs | 64 ++++++++++ tools/spicepodschema/README.md | 2 +- 5 files changed, 247 insertions(+), 10 deletions(-) diff --git a/bin/spice/src/commands/component.rs b/bin/spice/src/commands/component.rs index 940d89a3fc..410607ceee 100644 --- a/bin/spice/src/commands/component.rs +++ b/bin/spice/src/commands/component.rs @@ -158,9 +158,13 @@ EXAMPLES # Add a SQL view spice view add recent_orders --sql "select * from orders limit 100" - # Add an MCP-backed tool with a secret-bound token - spice tool add lookup --from mcp:server \ - --env TOKEN='${ secrets:TOKEN }' + # Add an HTTP MCP-backed tool with a secret-bound bearer token + spice tool add lookup --from mcp:https://example.com/v1/mcp \ + --param mcp_auth_token='${ secrets:TOKEN }' + + # Or pass custom HTTP headers using the same format as HTTP datasets + spice tool add lookup --from mcp:https://example.com/v1/mcp \ + --param mcp_headers='X-API-Key: ${ secrets:API_KEY }' # Reference an external component definition file spice model add --ref models/llm.yaml diff --git a/crates/runtime/src/tools/mcp/catalog.rs b/crates/runtime/src/tools/mcp/catalog.rs index 6b17261a80..fda583891d 100644 --- a/crates/runtime/src/tools/mcp/catalog.rs +++ b/crates/runtime/src/tools/mcp/catalog.rs @@ -26,8 +26,12 @@ use rmcp::{ }, serve_client, service::{RunningService, ServiceError}, - transport::{ConfigureCommandExt, StreamableHttpClientTransport, TokioChildProcess}, + transport::{ + ConfigureCommandExt, StreamableHttpClientTransport, TokioChildProcess, + streamable_http_client::StreamableHttpClientTransportConfig, + }, }; +use secrecy::ExposeSecret; use snafu::ResultExt; use std::{ sync::{Arc, LazyLock}, @@ -189,7 +193,11 @@ impl McpToolCatalog { .context(UnderlyingTransportSnafu)?, )) } - MCPConfig::StreamableHttp { url } => { + MCPConfig::StreamableHttp { + url, + auth_token, + headers, + } => { // Security: Validate URL scheme (only https allowed, http for localhost testing) if url.scheme() != "https" && url.scheme() != "http" { return Err(Error::CouldNotConstructTool { @@ -211,7 +219,13 @@ impl McpToolCatalog { ); } - let transport = StreamableHttpClientTransport::from_uri(url.to_string()); + let mut transport_config = + StreamableHttpClientTransportConfig::with_uri(url.to_string()) + .custom_headers(headers.clone()); + if let Some(auth_token) = auth_token { + transport_config = transport_config.auth_header(auth_token.expose_secret()); + } + let transport = StreamableHttpClientTransport::from_config(transport_config); let client_info = InitializeRequestParams::new( ClientCapabilities::default(), diff --git a/crates/runtime/src/tools/mcp/mod.rs b/crates/runtime/src/tools/mcp/mod.rs index f98a4f0104..a5b6d5b1eb 100644 --- a/crates/runtime/src/tools/mcp/mod.rs +++ b/crates/runtime/src/tools/mcp/mod.rs @@ -21,6 +21,7 @@ pub mod tool; use std::{collections::HashMap, str::FromStr}; +use http::{HeaderName, HeaderValue, header::AUTHORIZATION}; use rmcp::ErrorData as McpError; use secrecy::{ExposeSecret, SecretString}; use serde::{Deserialize, Serialize}; @@ -53,6 +54,9 @@ pub enum Error { pub type Result = std::result::Result; +const MCP_AUTH_TOKEN_PARAM: &str = "mcp_auth_token"; +const MCP_HEADERS_PARAM: &str = "mcp_headers"; + #[derive(Clone, PartialEq, Serialize, Deserialize)] #[serde(untagged)] pub enum MCPType { @@ -74,7 +78,7 @@ impl FromStr for MCPType { } } -#[derive(Clone, PartialEq, Serialize, Deserialize)] +#[derive(Clone)] pub(crate) enum MCPConfig { Stdio { command: String, @@ -83,6 +87,8 @@ pub(crate) enum MCPConfig { }, StreamableHttp { url: url::Url, + auth_token: Option, + headers: HashMap, }, } impl MCPConfig { @@ -108,11 +114,66 @@ impl MCPConfig { Self::Stdio { command, args, env } } - MCPType::StreamableHttp(url) => Self::StreamableHttp { url }, + MCPType::StreamableHttp(url) => { + let auth_token = params.get(MCP_AUTH_TOKEN_PARAM).cloned(); + let mut headers = parse_custom_headers(params); + if auth_token.is_some() && headers.remove(&AUTHORIZATION).is_some() { + tracing::warn!( + "Ignoring 'authorization' header from MCP custom headers because '{MCP_AUTH_TOKEN_PARAM}' is configured" + ); + } + Self::StreamableHttp { + url, + auth_token, + headers, + } + } } } } +fn parse_custom_headers( + params: &HashMap, +) -> HashMap { + let mut custom_headers = HashMap::new(); + let Some(headers) = params.get(MCP_HEADERS_PARAM) else { + return custom_headers; + }; + let param_name = MCP_HEADERS_PARAM; + + // Same UX as the HTTP connector's `http_headers` parameter: + // `Header1: Value1, Header2: Value2` (or semicolon-delimited). + let headers_str = headers.expose_secret(); + let delimiter = if headers_str.contains(';') { ';' } else { ',' }; + for header in headers_str.split(delimiter) { + let Some((name, value)) = header.split_once(':') else { + tracing::warn!( + "Malformed MCP HTTP header in '{param_name}'. Expected format 'Name: Value'. Skipping this header." + ); + continue; + }; + + let name = name.trim(); + let value = value.trim(); + let Ok(header_name) = HeaderName::try_from(name) else { + tracing::warn!( + "Invalid MCP HTTP header name in '{param_name}': '{name}'. Skipping this header." + ); + continue; + }; + let Ok(mut header_value) = HeaderValue::from_str(value) else { + tracing::warn!( + "Invalid MCP HTTP header value for '{name}' in '{param_name}'. Skipping this header." + ); + continue; + }; + header_value.set_sensitive(true); + custom_headers.insert(header_name, header_value); + } + + custom_headers +} + #[cfg(test)] mod tests { use super::*; @@ -180,7 +241,101 @@ mod tests { let mcp_type = MCPType::StreamableHttp(url.clone()); let cfg = MCPConfig::from_type(&mcp_type, &HashMap::new(), &HashMap::new()); match cfg { - MCPConfig::StreamableHttp { url: u } => assert_eq!(u, url), + MCPConfig::StreamableHttp { + url: u, + auth_token, + headers, + } => { + assert_eq!(u, url); + assert!(auth_token.is_none()); + assert!(headers.is_empty()); + } + MCPConfig::Stdio { .. } => panic!("expected https config"), + } + } + + #[test] + fn mcp_config_from_https_collects_auth_token() { + let url = url::Url::parse("https://example.com/v1/mcp").expect("valid url"); + let mcp_type = MCPType::StreamableHttp(url); + let mut params = HashMap::new(); + params.insert( + "mcp_auth_token".to_string(), + SecretString::from("test-api-key"), + ); + + let cfg = MCPConfig::from_type(&mcp_type, ¶ms, &HashMap::new()); + match cfg { + MCPConfig::StreamableHttp { + auth_token, + headers, + .. + } => { + assert_eq!( + auth_token.as_ref().map(ExposeSecret::expose_secret), + Some("test-api-key") + ); + assert!(headers.is_empty()); + } + MCPConfig::Stdio { .. } => panic!("expected https config"), + } + } + + #[test] + fn mcp_config_from_https_collects_custom_headers() { + let url = url::Url::parse("https://example.com/v1/mcp").expect("valid url"); + let mcp_type = MCPType::StreamableHttp(url); + let mut params = HashMap::new(); + params.insert( + MCP_HEADERS_PARAM.to_string(), + SecretString::from("X-API-Key: test-api-key, X-Tenant: acme"), + ); + + let cfg = MCPConfig::from_type(&mcp_type, ¶ms, &HashMap::new()); + match cfg { + MCPConfig::StreamableHttp { headers, .. } => { + assert_eq!( + headers + .get(&HeaderName::from_static("x-api-key")) + .and_then(|value| value.to_str().ok()), + Some("test-api-key") + ); + assert_eq!( + headers + .get(&HeaderName::from_static("x-tenant")) + .and_then(|value| value.to_str().ok()), + Some("acme") + ); + } + MCPConfig::Stdio { .. } => panic!("expected https config"), + } + } + + #[test] + fn mcp_auth_token_removes_custom_authorization_header() { + let url = url::Url::parse("https://example.com/v1/mcp").expect("valid url"); + let mcp_type = MCPType::StreamableHttp(url); + let mut params = HashMap::new(); + params.insert( + MCP_AUTH_TOKEN_PARAM.to_string(), + SecretString::from("test-api-key"), + ); + params.insert( + MCP_HEADERS_PARAM.to_string(), + SecretString::from("Authorization: Basic abc, X-Tenant: acme"), + ); + + let cfg = MCPConfig::from_type(&mcp_type, ¶ms, &HashMap::new()); + match cfg { + MCPConfig::StreamableHttp { headers, .. } => { + assert!(!headers.contains_key(&AUTHORIZATION)); + assert_eq!( + headers + .get(&HeaderName::from_static("x-tenant")) + .and_then(|value| value.to_str().ok()), + Some("acme") + ); + } MCPConfig::Stdio { .. } => panic!("expected https config"), } } diff --git a/crates/runtime/tests/models/tools.rs b/crates/runtime/tests/models/tools.rs index 82bb849aee..0f56b53301 100644 --- a/crates/runtime/tests/models/tools.rs +++ b/crates/runtime/tests/models/tools.rs @@ -84,6 +84,70 @@ params: Ok(()) } + /// Test that spiced can connect to an auth-enabled Streamable HTTP MCP server using + /// `params.mcp_auth_token`, which is mounted as `Authorization: Bearer `. + #[tokio::test] + async fn test_mcp_streamable_http_with_auth_token() -> Result<(), anyhow::Error> { + let http_server_url = start_spiced_with_mcp_config(McpConfig { + allowed_hosts: Some(vec!["*".to_string()]), + }) + .await + .expect("Failed to start auth-enabled spiced MCP server"); + + let tool_yaml = format!( + "name: mcp_from_spiced\nfrom: mcp:{http_server_url}/v1/mcp\nparams:\n mcp_auth_token: {TEST_API_KEY}" + ); + let http_client_url = start_spiced_with_tools(vec![ + yaml::from_str(tool_yaml.as_str()) + .expect("Tool spicepod component is not in expected format"), + ]) + .await + .expect("Failed to start spiced with MCP tool"); + + let tools_list = call_tool_list(http_client_url.as_str()).await?; + assert!( + tools_list.iter().any(|tool| tool + .get("name") + .and_then(Value::as_str) + .is_some_and(|name| name == "mcp_from_spiced/get_readiness")), + "expected proxied MCP tools from auth-enabled Spice server: {tools_list:?}" + ); + + Ok(()) + } + + /// Test that spiced can connect to an auth-enabled Streamable HTTP MCP server using + /// custom headers in the same format as the HTTP connector's `http_headers` param. + #[tokio::test] + async fn test_mcp_streamable_http_with_custom_headers() -> Result<(), anyhow::Error> { + let http_server_url = start_spiced_with_mcp_config(McpConfig { + allowed_hosts: Some(vec!["*".to_string()]), + }) + .await + .expect("Failed to start auth-enabled spiced MCP server"); + + let tool_yaml = format!( + "name: mcp_from_spiced\nfrom: mcp:{http_server_url}/v1/mcp\nparams:\n mcp_headers: 'X-API-Key: {TEST_API_KEY}'" + ); + let http_client_url = start_spiced_with_tools(vec![ + yaml::from_str(tool_yaml.as_str()) + .expect("Tool spicepod component is not in expected format"), + ]) + .await + .expect("Failed to start spiced with MCP tool"); + + let tools_list = call_tool_list(http_client_url.as_str()).await?; + assert!( + tools_list.iter().any(|tool| tool + .get("name") + .and_then(Value::as_str) + .is_some_and(|name| name == "mcp_from_spiced/get_readiness")), + "expected proxied MCP tools from auth-enabled Spice server: {tools_list:?}" + ); + + Ok(()) + } + /// Test the MCP Streamable HTTP server endpoint directly via JSON-RPC, /// without going through the rmcp client. This verifies the wire format /// (`POST /v1/mcp` with `Accept: application/json, text/event-stream`) diff --git a/tools/spicepodschema/README.md b/tools/spicepodschema/README.md index 59ad4f91c9..3c07d59c2c 100644 --- a/tools/spicepodschema/README.md +++ b/tools/spicepodschema/README.md @@ -318,7 +318,7 @@ Parameters are currently extracted directly in `crates/runtime/src/model/embed.r Tool types: - `auto` - Builtin tools (get_readiness, list_datasets, sql, search, etc.) -- `mcp` - Model Context Protocol tools (has `mcp_args` param) +- `mcp` - Model Context Protocol tools (has `mcp_args`, `mcp_auth_token`, and `mcp_headers` params) - `memory` - Memory tools (store, load) Parameters are currently handled inline in `crates/runtime/src/tools/`.