Skip to content

Commit 0e7b78c

Browse files
committed
[core] Refactor LookupChangelogMergeFunctionWrapper to better readability
1 parent c933bfd commit 0e7b78c

File tree

3 files changed

+87
-63
lines changed

3 files changed

+87
-63
lines changed

paimon-core/src/main/java/org/apache/paimon/mergetree/compact/LookupChangelogMergeFunctionWrapper.java

Lines changed: 43 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030

3131
import javax.annotation.Nullable;
3232

33+
import java.util.ArrayList;
3334
import java.util.Comparator;
34-
import java.util.Iterator;
3535
import java.util.LinkedList;
3636
import java.util.List;
3737
import java.util.function.Function;
@@ -56,90 +56,78 @@
5656
public class LookupChangelogMergeFunctionWrapper<T>
5757
implements MergeFunctionWrapper<ChangelogResult> {
5858

59-
private final MergeFunction<KeyValue> mergeFunction;
59+
private final LookupMergeFunction mergeFunction;
6060
private final Function<InternalRow, T> lookup;
6161

6262
private final ChangelogResult reusedResult = new ChangelogResult();
6363
private final KeyValue reusedBefore = new KeyValue();
6464
private final KeyValue reusedAfter = new KeyValue();
6565
@Nullable private final RecordEqualiser valueEqualiser;
6666
private final LookupStrategy lookupStrategy;
67-
private final @Nullable DeletionVectorsMaintainer deletionVectorsMaintainer;
67+
private final @Nullable DeletionVectorsMaintainer dvMaintainer;
6868
private final Comparator<KeyValue> comparator;
6969

70-
private final LinkedList<KeyValue> candidates = new LinkedList<>();
71-
7270
public LookupChangelogMergeFunctionWrapper(
7371
MergeFunctionFactory<KeyValue> mergeFunctionFactory,
7472
Function<InternalRow, T> lookup,
7573
@Nullable RecordEqualiser valueEqualiser,
7674
LookupStrategy lookupStrategy,
77-
@Nullable DeletionVectorsMaintainer deletionVectorsMaintainer,
75+
@Nullable DeletionVectorsMaintainer dvMaintainer,
7876
@Nullable UserDefinedSeqComparator userDefinedSeqComparator) {
77+
if (lookupStrategy.deletionVector) {
78+
checkArgument(
79+
dvMaintainer != null,
80+
"deletionVectorsMaintainer should not be null, there is a bug.");
81+
}
7982
MergeFunction<KeyValue> mergeFunction = mergeFunctionFactory.create();
8083
checkArgument(
8184
mergeFunction instanceof LookupMergeFunction,
8285
"Merge function should be a LookupMergeFunction, but is %s, there is a bug.",
8386
mergeFunction.getClass().getName());
84-
if (lookupStrategy.deletionVector) {
85-
checkArgument(
86-
deletionVectorsMaintainer != null,
87-
"deletionVectorsMaintainer should not be null, there is a bug.");
88-
}
89-
this.mergeFunction = mergeFunctionFactory.create();
87+
this.mergeFunction = (LookupMergeFunction) mergeFunction;
9088
this.lookup = lookup;
9189
this.valueEqualiser = valueEqualiser;
9290
this.lookupStrategy = lookupStrategy;
93-
this.deletionVectorsMaintainer = deletionVectorsMaintainer;
91+
this.dvMaintainer = dvMaintainer;
9492
this.comparator = createSequenceComparator(userDefinedSeqComparator);
9593
}
9694

9795
@Override
9896
public void reset() {
99-
candidates.clear();
97+
mergeFunction.reset();
10098
}
10199

102100
@Override
103101
public void add(KeyValue kv) {
104-
candidates.add(kv);
102+
mergeFunction.add(kv);
105103
}
106104

107105
@Override
108106
public ChangelogResult getResult() {
109-
// 1. Compute the latest high level record and containLevel0 of candidates
110-
Iterator<KeyValue> descending = candidates.descendingIterator();
111-
KeyValue highLevel = null;
112-
boolean containLevel0 = false;
113-
while (descending.hasNext()) {
114-
KeyValue kv = descending.next();
115-
if (kv.level() > 0) {
116-
descending.remove();
117-
if (highLevel == null || kv.level() < highLevel.level()) {
118-
highLevel = kv;
119-
}
120-
} else {
121-
containLevel0 = true;
122-
}
123-
}
107+
// 1. Find the latest high level record and compute containLevel0
108+
KeyValue highLevel = mergeFunction.pickHighLevel();
109+
boolean containLevel0 = mergeFunction.containLevel0();
124110

125111
// 2. Lookup if latest high level record is absent
126112
if (highLevel == null) {
127-
InternalRow lookupKey = candidates.get(0).key();
128-
T lookupResult = lookup.apply(lookupKey);
113+
T lookupResult = lookup.apply(mergeFunction.key());
129114
if (lookupResult != null) {
130115
if (lookupStrategy.deletionVector) {
131116
PositionedKeyValue positionedKeyValue = (PositionedKeyValue) lookupResult;
132117
highLevel = positionedKeyValue.keyValue();
133-
deletionVectorsMaintainer.notifyNewDeletion(
118+
dvMaintainer.notifyNewDeletion(
134119
positionedKeyValue.fileName(), positionedKeyValue.rowPosition());
135120
} else {
136121
highLevel = (KeyValue) lookupResult;
137122
}
138123
}
124+
if (highLevel != null) {
125+
insertInto(mergeFunction.candidates(), highLevel);
126+
}
139127
}
140128

141129
// 3. Calculate result
142-
KeyValue result = calculateResult(candidates, highLevel);
130+
KeyValue result = mergeFunction.getResult();
143131

144132
// 4. Set changelog when there's level-0 records
145133
reusedResult.reset();
@@ -150,21 +138,22 @@ public ChangelogResult getResult() {
150138
return reusedResult.setResult(result);
151139
}
152140

153-
private KeyValue calculateResult(List<KeyValue> candidates, @Nullable KeyValue highLevel) {
154-
mergeFunction.reset();
141+
private void insertInto(LinkedList<KeyValue> candidates, KeyValue highLevel) {
142+
List<KeyValue> newCandidates = new ArrayList<>();
155143
for (KeyValue candidate : candidates) {
156144
if (highLevel != null && comparator.compare(highLevel, candidate) < 0) {
157-
mergeFunction.add(highLevel);
158-
mergeFunction.add(candidate);
145+
newCandidates.add(highLevel);
146+
newCandidates.add(candidate);
159147
highLevel = null;
160148
} else {
161-
mergeFunction.add(candidate);
149+
newCandidates.add(candidate);
162150
}
163151
}
164152
if (highLevel != null) {
165-
mergeFunction.add(highLevel);
153+
newCandidates.add(highLevel);
166154
}
167-
return mergeFunction.getResult();
155+
candidates.clear();
156+
candidates.addAll(newCandidates);
168157
}
169158

170159
private void setChangelog(@Nullable KeyValue before, KeyValue after) {
@@ -197,16 +186,21 @@ private KeyValue replace(KeyValue reused, RowKind valueKind, KeyValue from) {
197186
}
198187

199188
private Comparator<KeyValue> createSequenceComparator(
200-
@Nullable FieldsComparator userDefinedSeqComparator) {
201-
if (userDefinedSeqComparator == null) {
202-
return Comparator.comparingLong(KeyValue::sequenceNumber);
203-
}
204-
189+
@Nullable FieldsComparator udsComparator) {
205190
return (o1, o2) -> {
206-
int result = userDefinedSeqComparator.compare(o1.value(), o2.value());
207-
if (result != 0) {
208-
return result;
191+
// For high-level comparison logic (not involving Level 0), only the value of the
192+
// minimum Level should be selected
193+
if (o1.level() > 0 && o2.level() > 0 && o1.level() != o2.level()) {
194+
return Integer.compare(o2.level(), o1.level());
195+
}
196+
197+
if (udsComparator != null) {
198+
int result = udsComparator.compare(o1.value(), o2.value());
199+
if (result != 0) {
200+
return result;
201+
}
209202
}
203+
210204
return Long.compare(o1.sequenceNumber(), o2.sequenceNumber());
211205
};
212206
}

paimon-core/src/main/java/org/apache/paimon/mergetree/compact/LookupMergeFunction.java

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.paimon.mergetree.compact;
2020

2121
import org.apache.paimon.KeyValue;
22+
import org.apache.paimon.data.InternalRow;
2223

2324
import javax.annotation.Nullable;
2425

@@ -48,29 +49,45 @@ public void add(KeyValue kv) {
4849
candidates.add(kv);
4950
}
5051

51-
@Override
52-
public KeyValue getResult() {
53-
// 1. Find the latest high level record
54-
KeyValue keptHighLevel = null;
55-
LinkedList<KeyValue> highLevels = new LinkedList<>();
56-
52+
@Nullable
53+
public KeyValue pickHighLevel() {
54+
KeyValue highLevel = null;
5755
for (KeyValue kv : candidates) {
5856
if (kv.level() > 0) {
59-
highLevels.add(kv);
60-
if (keptHighLevel == null || kv.level() < keptHighLevel.level()) {
61-
keptHighLevel = kv;
57+
if (highLevel == null || kv.level() < highLevel.level()) {
58+
highLevel = kv;
6259
}
6360
}
6461
}
62+
return highLevel;
63+
}
6564

66-
if (highLevels.size() > 1) {
67-
highLevels.remove(keptHighLevel);
68-
candidates.removeAll(highLevels);
65+
public boolean containLevel0() {
66+
for (KeyValue kv : candidates) {
67+
if (kv.level() == 0) {
68+
return true;
69+
}
6970
}
71+
return false;
72+
}
73+
74+
public InternalRow key() {
75+
return candidates.get(0).key();
76+
}
77+
78+
public LinkedList<KeyValue> candidates() {
79+
return candidates;
80+
}
7081

71-
// 2. Do the merge for inputs
82+
@Override
83+
public KeyValue getResult() {
7284
mergeFunction.reset();
73-
candidates.forEach(mergeFunction::add);
85+
KeyValue highLevel = pickHighLevel();
86+
for (KeyValue kv : candidates) {
87+
if (kv.level() == 0 || kv == highLevel) {
88+
mergeFunction.add(kv);
89+
}
90+
}
7491
return mergeFunction.getResult();
7592
}
7693

paimon-core/src/test/java/org/apache/paimon/mergetree/compact/LookupChangelogMergeFunctionWrapperTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,4 +547,17 @@ public void testKeepLowestHighLevel() {
547547
kv = result.result();
548548
assertThat(kv.value().getInt(0)).isEqualTo(3);
549549
}
550+
551+
@Test
552+
public void testKeepLowestHighLevelLookupFunction() {
553+
LookupMergeFunction function =
554+
(LookupMergeFunction)
555+
LookupMergeFunction.wrap(DeduplicateMergeFunction.factory()).create();
556+
function.reset();
557+
function.add(new KeyValue().replace(row(1), 1, INSERT, row(2)).setLevel(1));
558+
function.add(new KeyValue().replace(row(1), 1, INSERT, row(1)).setLevel(2));
559+
KeyValue kv = function.getResult();
560+
assertThat(kv).isNotNull();
561+
assertThat(kv.value().getInt(0)).isEqualTo(2);
562+
}
550563
}

0 commit comments

Comments
 (0)