Skip to content

Commit 2ec12ac

Browse files
committed
fix: merge into split logic
for merge-into stmt with both matched and not-matched branch, right join is used, after got the joined result-set, a column of the result-set is chosen as the "split" column, i.e. for rows of the split column, those are nulls are recognized as no-matched, and others as matched. before this PR, an arbitrary column of target table is picked as the "split" column, which is unsafe, since the "arbitrary column" may have NULL values originally, which may lead to incorrectly treat a matched row as unmatched, and unexpected result (data duplication).
1 parent 28c543e commit 2ec12ac

File tree

4 files changed

+31
-17
lines changed

4 files changed

+31
-17
lines changed

src/query/service/src/interpreters/interpreter_merge_into.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ impl MergeIntoInterpreter {
227227

228228
let insert_only = matches!(merge_type, MergeIntoType::InsertOnly);
229229

230-
let mut row_id_idx = if !insert_only && !target_build_optimization {
230+
let mut row_id_idx = if !insert_only {
231231
match meta_data
232232
.read()
233233
.row_id_index_by_table_index(*target_table_idx)

src/query/sql/src/planner/binder/merge_into.rs

+2-15
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ use crate::IndexType;
5858
use crate::ScalarBinder;
5959
use crate::ScalarExpr;
6060
use crate::Visibility;
61-
use crate::DUMMY_COLUMN_INDEX;
6261

6362
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
6463
pub enum MergeIntoType {
@@ -438,20 +437,8 @@ impl Binder {
438437
.await?,
439438
);
440439
}
441-
let mut split_idx = DUMMY_COLUMN_INDEX;
442-
// find any target table column index for merge_into_split
443-
for column in self
444-
.metadata
445-
.read()
446-
.columns_by_table_index(table_index)
447-
.iter()
448-
{
449-
if column.index() != row_id_index {
450-
split_idx = column.index();
451-
break;
452-
}
453-
}
454-
assert!(split_idx != DUMMY_COLUMN_INDEX);
440+
441+
let split_idx = row_id_index;
455442

456443
Ok(MergeInto {
457444
catalog: catalog_name.to_string(),

src/query/sql/src/planner/optimizer/optimizer.rs

-1
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,6 @@ async fn optimize_merge_into(opt_ctx: OptimizerContext, plan: Box<MergeInto>) ->
494494
== 0
495495
&& flag
496496
{
497-
new_columns_set.remove(&plan.row_id_index);
498497
opt_ctx.table_ctx.set_merge_into_join(MergeIntoJoin {
499498
merge_into_join_type: MergeIntoJoinType::Left,
500499
is_distributed: false,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
statement ok
2+
create or replace database m_test;
3+
4+
statement ok
5+
use m_test;
6+
7+
statement ok
8+
create table t(a string, b string, c string, d string, k string);
9+
10+
statement ok
11+
create table s(a string, b string, c string, d string, k string);
12+
13+
statement ok
14+
insert into t(k) values('k');
15+
16+
statement ok
17+
insert into s(k) values('k');
18+
19+
20+
query II
21+
merge into t using s on t.k = s.k when matched then update * when not matched then insert *;
22+
----
23+
0 1
24+
25+
query TTTTT
26+
select * from t;
27+
----
28+
NULL NULL NULL NULL k

0 commit comments

Comments
 (0)