Skip to content

Commit 976bb84

Browse files
committed
[core][python] Support null bitmap in Python BTree
1 parent 47e7801 commit 976bb84

File tree

6 files changed

+339
-30
lines changed

6 files changed

+339
-30
lines changed

paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
public class LazyFilteredBTreeReader implements GlobalIndexReader {
4343

4444
private final BTreeFileMetaSelector fileSelector;
45-
private final List<GlobalIndexIOMeta> files;
4645
private final Map<Path, GlobalIndexReader> readerCache;
4746
private final KeySerializer keySerializer;
4847
private final CacheManager cacheManager;
@@ -58,7 +57,6 @@ public LazyFilteredBTreeReader(
5857
this.cacheManager = cacheManager;
5958
this.fileReader = fileReader;
6059
this.keySerializer = keySerializer;
61-
this.files = files;
6260
}
6361

6462
@Override

paimon-core/src/main/java/org/apache/paimon/utils/MutableObjectIteratorAdapter.java

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ public class MutableObjectIteratorAdapter<I extends E, E> implements Iterator<E>
4242

4343
private final MutableObjectIterator<I> delegate;
4444
private final I instance;
45+
4546
private E nextElement;
46-
private boolean hasNext = false;
47-
private boolean initialized = false;
47+
private boolean prefetched = false;
4848

4949
/**
5050
* Creates a new adapter wrapping the given {@link MutableObjectIterator}.
@@ -58,33 +58,26 @@ public MutableObjectIteratorAdapter(MutableObjectIterator<I> delegate, I instanc
5858

5959
@Override
6060
public boolean hasNext() {
61-
if (!initialized) {
61+
if (!prefetched) {
6262
prefetch();
6363
}
64-
return hasNext;
64+
return nextElement != null;
6565
}
6666

6767
@Override
6868
public E next() {
6969
if (!hasNext()) {
7070
throw new NoSuchElementException();
7171
}
72-
E result = nextElement;
73-
prefetch();
74-
return result;
72+
prefetched = false;
73+
return nextElement;
7574
}
7675

77-
/**
78-
* Prefetches the next element from the delegate iterator.
79-
*
80-
* <p>This method reads ahead one element to support the {@link #hasNext()} check required by
81-
* the standard {@link Iterator} interface.
82-
*/
76+
/** Prefetches the next element from the delegate iterator. */
8377
private void prefetch() {
8478
try {
8579
nextElement = delegate.next(instance);
86-
hasNext = (nextElement != null);
87-
initialized = true;
80+
prefetched = true;
8881
} catch (IOException e) {
8982
throw new RuntimeException("Failed to read next element from MutableObjectIterator", e);
9083
}

paimon-core/src/test/java/org/apache/paimon/JavaPyE2ETest.java

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ public void testBtreeIndexWrite() throws Exception {
416416
testBtreeIndexWriteInt();
417417
testBtreeIndexWriteBigInt();
418418
testBtreeIndexWriteLarge();
419+
testBtreeIndexWriteNull();
419420
}
420421

421422
private void testBtreeIndexWriteString() throws Exception {
@@ -436,7 +437,7 @@ private void testBtreeIndexWriteBigInt() throws Exception {
436437
DataTypes.BIGINT(), "test_btree_index_bigint", 1000L, 2000L, 3000L);
437438
}
438439

439-
private <T> void testBtreeIndexWriteGeneric(
440+
private void testBtreeIndexWriteGeneric(
440441
DataType keyType, String tableName, Object key1, Object key2, Object key3)
441442
throws Exception {
442443
// create table
@@ -569,6 +570,74 @@ private void testBtreeIndexWriteLarge() throws Exception {
569570
assertThat(result).containsOnly("v2");
570571
}
571572

573+
private void testBtreeIndexWriteNull() throws Exception {
574+
// create table
575+
RowType rowType =
576+
RowType.of(
577+
new DataType[] {DataTypes.STRING(), DataTypes.STRING()},
578+
new String[] {"k", "v"});
579+
Options options = new Options();
580+
Path tablePath = new Path(warehouse.toString() + "/default.db/test_btree_index_null");
581+
options.set(PATH, tablePath.toString());
582+
options.set(ROW_TRACKING_ENABLED, true);
583+
options.set(DATA_EVOLUTION_ENABLED, true);
584+
options.set(GLOBAL_INDEX_ENABLED, true);
585+
TableSchema tableSchema =
586+
SchemaUtils.forceCommit(
587+
new SchemaManager(LocalFileIO.create(), tablePath),
588+
new Schema(
589+
rowType.getFields(),
590+
Collections.emptyList(),
591+
Collections.emptyList(),
592+
options.toMap(),
593+
""));
594+
AppendOnlyFileStoreTable table =
595+
new AppendOnlyFileStoreTable(
596+
FileIOFinder.find(tablePath),
597+
tablePath,
598+
tableSchema,
599+
CatalogEnvironment.empty());
600+
601+
// write data
602+
BatchWriteBuilder writeBuilder = table.newBatchWriteBuilder();
603+
try (BatchTableWrite write = writeBuilder.newWrite();
604+
BatchTableCommit commit = writeBuilder.newCommit()) {
605+
write.write(
606+
GenericRow.of(BinaryString.fromString("k1"), BinaryString.fromString("v1")));
607+
write.write(
608+
GenericRow.of(BinaryString.fromString("k2"), BinaryString.fromString("v2")));
609+
write.write(GenericRow.of(null, BinaryString.fromString("v3")));
610+
write.write(
611+
GenericRow.of(BinaryString.fromString("k4"), BinaryString.fromString("v4")));
612+
write.write(GenericRow.of(null, BinaryString.fromString("v5")));
613+
commit.commit(write.prepareCommit());
614+
}
615+
616+
// build index
617+
BTreeGlobalIndexBuilder builder =
618+
new BTreeGlobalIndexBuilder(table).withIndexType("btree").withIndexField("k");
619+
try (BatchTableCommit commit = writeBuilder.newCommit()) {
620+
commit.commit(builder.build(builder.scan(), IOManager.create(warehouse.toString())));
621+
}
622+
623+
// assert index
624+
List<IndexManifestEntry> indexEntries =
625+
table.indexManifestFileReader().read(table.latestSnapshot().get().indexManifest);
626+
assertThat(indexEntries)
627+
.singleElement()
628+
.matches(entry -> entry.indexFile().rowCount() == 5);
629+
630+
// read index is null
631+
PredicateBuilder predicateBuilder = new PredicateBuilder(table.rowType());
632+
ReadBuilder readBuilder = table.newReadBuilder().withFilter(predicateBuilder.isNull(0));
633+
List<String> result = new ArrayList<>();
634+
readBuilder
635+
.newRead()
636+
.createReader(readBuilder.newScan().plan())
637+
.forEachRemaining(r -> result.add(r.getString(1).toString()));
638+
assertThat(result).containsExactlyInAnyOrder("v3", "v5");
639+
}
640+
572641
// Helper method from TableTestBase
573642
protected Identifier identifier(String tableName) {
574643
return new Identifier(database, tableName);
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.utils;
20+
21+
import org.junit.jupiter.api.Test;
22+
23+
import java.io.IOException;
24+
import java.util.ArrayList;
25+
import java.util.Arrays;
26+
import java.util.Collections;
27+
import java.util.List;
28+
import java.util.NoSuchElementException;
29+
30+
import static org.assertj.core.api.Assertions.assertThat;
31+
import static org.assertj.core.api.Assertions.assertThatThrownBy;
32+
33+
/** Tests for {@link MutableObjectIteratorAdapter}. */
34+
class MutableObjectIteratorAdapterTest {
35+
36+
@Test
37+
public void testBasicIteration() {
38+
List<Integer> values = Arrays.asList(1, 2, 3, 4, 5);
39+
MutableObjectIterator<Integer> delegate = createTestIterator(values);
40+
41+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
42+
new MutableObjectIteratorAdapter<>(delegate, 0);
43+
44+
List<Integer> result = new ArrayList<>();
45+
while (adapter.hasNext()) {
46+
result.add(adapter.next());
47+
}
48+
49+
assertThat(result).containsExactly(1, 2, 3, 4, 5);
50+
}
51+
52+
@Test
53+
public void testEmptyIterator() {
54+
List<Integer> values = Collections.emptyList();
55+
MutableObjectIterator<Integer> delegate = createTestIterator(values);
56+
57+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
58+
new MutableObjectIteratorAdapter<>(delegate, 0);
59+
60+
assertThat(adapter.hasNext()).isFalse();
61+
assertThatThrownBy(adapter::next).isInstanceOf(NoSuchElementException.class);
62+
}
63+
64+
@Test
65+
public void testSingleElement() {
66+
List<Integer> values = Collections.singletonList(42);
67+
MutableObjectIterator<Integer> delegate = createTestIterator(values);
68+
69+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
70+
new MutableObjectIteratorAdapter<>(delegate, 0);
71+
72+
assertThat(adapter.hasNext()).isTrue();
73+
assertThat(adapter.next()).isEqualTo(42);
74+
assertThat(adapter.hasNext()).isFalse();
75+
}
76+
77+
@Test
78+
public void testMultipleHasNextCalls() {
79+
List<Integer> values = Arrays.asList(1, 2);
80+
MutableObjectIterator<Integer> delegate = createTestIterator(values);
81+
82+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
83+
new MutableObjectIteratorAdapter<>(delegate, 0);
84+
85+
// Call hasNext multiple times should not advance the iterator
86+
assertThat(adapter.hasNext()).isTrue();
87+
assertThat(adapter.hasNext()).isTrue();
88+
assertThat(adapter.hasNext()).isTrue();
89+
90+
assertThat(adapter.next()).isEqualTo(1);
91+
92+
assertThat(adapter.hasNext()).isTrue();
93+
assertThat(adapter.hasNext()).isTrue();
94+
95+
assertThat(adapter.next()).isEqualTo(2);
96+
assertThat(adapter.hasNext()).isFalse();
97+
}
98+
99+
@Test
100+
public void testNextWithoutHasNext() {
101+
List<Integer> values = Arrays.asList(1, 2, 3);
102+
MutableObjectIterator<Integer> delegate = createTestIterator(values);
103+
104+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
105+
new MutableObjectIteratorAdapter<>(delegate, 0);
106+
107+
// Call next() without calling hasNext() should still work
108+
assertThat(adapter.next()).isEqualTo(1);
109+
assertThat(adapter.next()).isEqualTo(2);
110+
assertThat(adapter.next()).isEqualTo(3);
111+
112+
assertThatThrownBy(adapter::next).isInstanceOf(NoSuchElementException.class);
113+
}
114+
115+
@Test
116+
public void testIOExceptionHandling() {
117+
MutableObjectIterator<Integer> failingDelegate =
118+
new MutableObjectIterator<Integer>() {
119+
private int count = 0;
120+
121+
@Override
122+
public Integer next(Integer reuse) throws IOException {
123+
count++;
124+
if (count == 1) {
125+
return 1;
126+
}
127+
throw new IOException("Test exception");
128+
}
129+
130+
@Override
131+
public Integer next() throws IOException {
132+
return next(null);
133+
}
134+
};
135+
136+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
137+
new MutableObjectIteratorAdapter<>(failingDelegate, 0);
138+
139+
assertThat(adapter.hasNext()).isTrue();
140+
assertThat(adapter.next()).isEqualTo(1);
141+
142+
assertThatThrownBy(adapter::hasNext)
143+
.isInstanceOf(RuntimeException.class)
144+
.hasMessageContaining("Failed to read next element from MutableObjectIterator")
145+
.hasCauseExactlyInstanceOf(IOException.class);
146+
}
147+
148+
@Test
149+
public void testNullElements() {
150+
List<Integer> values = Arrays.asList(1, null, 3);
151+
MutableObjectIterator<Integer> delegate = createTestIterator(values);
152+
153+
MutableObjectIteratorAdapter<Integer, Integer> adapter =
154+
new MutableObjectIteratorAdapter<>(delegate, 0);
155+
156+
assertThat(adapter.hasNext()).isTrue();
157+
assertThat(adapter.next()).isEqualTo(1);
158+
159+
// Null element should be treated as end of iteration
160+
assertThat(adapter.hasNext()).isFalse();
161+
}
162+
163+
@Test
164+
public void testStringType() {
165+
List<String> values = Arrays.asList("hello", "world", "test");
166+
MutableObjectIterator<String> delegate = createTestIterator(values);
167+
168+
MutableObjectIteratorAdapter<String, String> adapter =
169+
new MutableObjectIteratorAdapter<>(delegate, "");
170+
171+
List<String> result = new ArrayList<>();
172+
while (adapter.hasNext()) {
173+
result.add(adapter.next());
174+
}
175+
176+
assertThat(result).containsExactly("hello", "world", "test");
177+
}
178+
179+
@Test
180+
public void testLongType() {
181+
List<Long> values = Arrays.asList(100L, 200L, 300L);
182+
MutableObjectIterator<Long> delegate = createTestIterator(values);
183+
184+
MutableObjectIteratorAdapter<Long, Long> adapter =
185+
new MutableObjectIteratorAdapter<>(delegate, 0L);
186+
187+
List<Long> result = new ArrayList<>();
188+
while (adapter.hasNext()) {
189+
result.add(adapter.next());
190+
}
191+
192+
assertThat(result).containsExactly(100L, 200L, 300L);
193+
}
194+
195+
/**
196+
* Creates a test implementation of MutableObjectIterator that returns elements from the given
197+
* list.
198+
*/
199+
private <T> MutableObjectIterator<T> createTestIterator(List<T> values) {
200+
return new MutableObjectIterator<T>() {
201+
private int index = 0;
202+
203+
@Override
204+
public T next(T reuse) {
205+
if (index >= values.size()) {
206+
return null;
207+
}
208+
return values.get(index++);
209+
}
210+
211+
@Override
212+
public T next() {
213+
return next(null);
214+
}
215+
};
216+
}
217+
}

0 commit comments

Comments
 (0)