Skip to content

Commit 3b05cf1

Browse files
committed
[parquet] Simplify row ranges when read empty columns
1 parent c1d5ad1 commit 3b05cf1

File tree

1 file changed

+9
-22
lines changed

1 file changed

+9
-22
lines changed

paimon-format/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@
9898
import java.util.Map;
9999
import java.util.Map.Entry;
100100
import java.util.Objects;
101-
import java.util.Optional;
102101
import java.util.Set;
103102
import java.util.concurrent.TimeUnit;
104103
import java.util.concurrent.TimeoutException;
@@ -825,33 +824,21 @@ private RowRanges getRowRanges(int blockIndex) {
825824
}
826825

827826
private RowRanges calculateRowRanges(int blockIndex, RoaringBitmap32 selection) {
828-
List<OffsetIndex> offsets;
829827
BlockMetaData block = blocks.get(blockIndex);
830-
if (paths.isEmpty()) {
831-
Optional<ColumnChunkMetaData> first = block.getColumns().stream().findFirst();
832-
if (first.isPresent()) {
833-
ColumnPath path = first.get().getPath();
834-
OffsetIndex index =
835-
ColumnIndexStoreImpl.create(this, block, Collections.singleton(path))
836-
.getOffsetIndex(path);
837-
offsets = Collections.singletonList(index);
838-
} else {
839-
offsets = Collections.emptyList();
840-
}
841-
} else {
842-
ColumnIndexStore store = getColumnIndexStore(blockIndex);
843-
offsets =
844-
paths.keySet().stream().map(store::getOffsetIndex).collect(Collectors.toList());
845-
}
828+
ColumnIndexStore store = getColumnIndexStore(blockIndex);
829+
List<OffsetIndex> offsets =
830+
paths.keySet().stream()
831+
.map(store::getOffsetIndex)
832+
.filter(Objects::nonNull)
833+
.collect(Collectors.toList());
846834

847835
long rowCount = block.getRowCount();
848836
long rowIndexOffset = block.getRowIndexOffset();
849837
RowRanges rowRanges = RowRanges.createSingle(rowCount);
850838
for (OffsetIndex offset : offsets) {
851-
if (offset != null) {
852-
RowRanges result = RowRanges.create(rowCount, rowIndexOffset, offset, selection);
853-
rowRanges = RowRanges.intersection(result, rowRanges);
854-
}
839+
// avoiding creating too many ranges, just filter columns pages
840+
RowRanges result = RowRanges.create(rowCount, rowIndexOffset, offset, selection);
841+
rowRanges = RowRanges.intersection(result, rowRanges);
855842
}
856843

857844
return rowRanges;

0 commit comments

Comments
 (0)