[AQUMV] Add cases of INSERT-SELECT queries using materialized views.

avamingli · avamingli · commit 44133378f08f · 2025-05-20T22:43:22.000+08:00
We already have the ability to use materialized views instead of
origin table in the SQL like:
INSERT INTO target table SELECT FROM origin table.

When valid materialized view candidates exist, the system will
automatically use them for the SELECT portion of the query,
eliminating the need to access and recompute data from the original
tables, providing significant performance benefits for queries
involving large datasets or frequent INSERT-SELECT operations by
leveraging pre-computed results from materialized views rather than
processing raw data each time.

Authored-by: Zhang Mingli &lt;avamingli@gmail.com&gt;
diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out
@@ -3305,6 +3305,75 @@ select count(*) from par_1_prt_2;
  Optimizer: Postgres query optimizer
 (6 rows)
 
+abort;
+-- Test INSERT SELECT
+begin; 
+create table t_insert(a int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+create table t_select(a int);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into t_select select i from generate_series(1, 1000) i;
+analyze t_insert;
+create materialized view mv_insert_select as
+select count(a) from t_select;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_insert_select;
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose) insert into t_insert select count(a) from t_select;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Insert on aqumv.t_insert
+   ->  Redistribute Motion 1:3  (slice1; segments: 1)
+         Output: (("*SELECT*".count)::integer)
+         Hash Key: (("*SELECT*".count)::integer)
+         ->  Subquery Scan on "*SELECT*"
+               Output: "*SELECT*".count
+               ->  Finalize Aggregate
+                     Output: count(t_select.a)
+                     ->  Gather Motion 3:1  (slice2; segments: 3)
+                           Output: (PARTIAL count(t_select.a))
+                           ->  Partial Aggregate
+                                 Output: PARTIAL count(t_select.a)
+                                 ->  Seq Scan on aqumv.t_select
+                                       Output: t_select.a
+ Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(16 rows)
+
+insert into t_insert select count(a) from t_select;
+select * from t_insert;
+  a   
+------
+ 1000
+(1 row)
+
+truncate t_insert;
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose) insert into t_insert select count(a) from t_select;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Insert on aqumv.t_insert
+   ->  Redistribute Motion 3:3  (slice1; segments: 3)
+         Output: (("*SELECT*".count)::integer)
+         Hash Key: (("*SELECT*".count)::integer)
+         ->  Subquery Scan on "*SELECT*"
+               Output: "*SELECT*".count
+               ->  Seq Scan on aqumv.mv_insert_select
+                     Output: mv_insert_select.count
+ Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+insert into t_insert select count(a) from t_select;
+select * from t_insert;
+  a   
+------
+ 1000
+(1 row)
+
 abort;
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql
@@ -852,6 +852,28 @@ explain(costs off, verbose)
 select count(*) from par_1_prt_2;
 abort;
 
+-- Test INSERT SELECT
+begin; 
+create table t_insert(a int);
+create table t_select(a int);
+insert into t_select select i from generate_series(1, 1000) i;
+analyze t_insert;
+create materialized view mv_insert_select as
+select count(a) from t_select;
+analyze mv_insert_select;
+
+set local enable_answer_query_using_materialized_views = off;
+explain(costs off, verbose) insert into t_insert select count(a) from t_select;
+insert into t_insert select count(a) from t_select;
+select * from t_insert;
+truncate t_insert;
+
+set local enable_answer_query_using_materialized_views = on;
+explain(costs off, verbose) insert into t_insert select count(a) from t_select;
+insert into t_insert select count(a) from t_select;
+select * from t_insert;
+abort;
+
 reset optimizer;
 reset enable_answer_query_using_materialized_views;
 -- start_ignore