|
39 | 39 | import org.apache.paimon.types.DataTypes; |
40 | 40 | import org.apache.paimon.utils.FunctionWithIOException; |
41 | 41 | import org.apache.paimon.utils.InternalRowUtils; |
| 42 | +import org.apache.paimon.utils.RangeHelper; |
42 | 43 | import org.apache.paimon.utils.SerializationUtils; |
43 | 44 |
|
44 | 45 | import javax.annotation.Nullable; |
@@ -144,29 +145,61 @@ public long rowCount() { |
144 | 145 |
|
145 | 146 | @Override |
146 | 147 | public OptionalLong mergedRowCount() { |
147 | | - if (!rawConvertible |
148 | | - || (dataDeletionFiles != null |
149 | | - && !dataDeletionFiles.stream() |
150 | | - .allMatch(f -> f == null || f.cardinality() != null))) { |
151 | | - return OptionalLong.empty(); |
| 148 | + if (rawMergedRowCountAvailable()) { |
| 149 | + return OptionalLong.of(rawMergedRowCount()); |
152 | 150 | } |
| 151 | + if (dataEvolutionRowCountAvailable()) { |
| 152 | + return OptionalLong.of(dataEvolutionMergedRowCount()); |
| 153 | + } |
| 154 | + return OptionalLong.empty(); |
| 155 | + } |
153 | 156 |
|
| 157 | + private boolean rawMergedRowCountAvailable() { |
| 158 | + return rawConvertible |
| 159 | + && (dataDeletionFiles == null |
| 160 | + || dataDeletionFiles.stream() |
| 161 | + .allMatch(f -> f == null || f.cardinality() != null)); |
| 162 | + } |
| 163 | + |
| 164 | + private long rawMergedRowCount() { |
154 | 165 | long sum = 0L; |
155 | | - List<RawFile> rawFiles = convertToRawFiles().orElse(null); |
156 | | - if (rawFiles != null) { |
157 | | - for (int i = 0; i < rawFiles.size(); i++) { |
158 | | - RawFile rawFile = rawFiles.get(i); |
159 | | - DeletionFile deletionFile = |
160 | | - dataDeletionFiles == null ? null : dataDeletionFiles.get(i); |
161 | | - Long cardinality = deletionFile == null ? null : deletionFile.cardinality(); |
162 | | - if (deletionFile == null) { |
163 | | - sum += rawFile.rowCount(); |
164 | | - } else if (cardinality != null) { |
165 | | - sum += rawFile.rowCount() - cardinality; |
166 | | - } |
| 166 | + for (int i = 0; i < dataFiles.size(); i++) { |
| 167 | + DataFileMeta file = dataFiles.get(i); |
| 168 | + DeletionFile deletionFile = dataDeletionFiles == null ? null : dataDeletionFiles.get(i); |
| 169 | + Long cardinality = deletionFile == null ? null : deletionFile.cardinality(); |
| 170 | + if (deletionFile == null) { |
| 171 | + sum += file.rowCount(); |
| 172 | + } else if (cardinality != null) { |
| 173 | + sum += file.rowCount() - cardinality; |
167 | 174 | } |
168 | 175 | } |
169 | | - return OptionalLong.of(sum); |
| 176 | + return sum; |
| 177 | + } |
| 178 | + |
| 179 | + private boolean dataEvolutionRowCountAvailable() { |
| 180 | + for (DataFileMeta file : dataFiles) { |
| 181 | + if (file.firstRowId() == null) { |
| 182 | + return false; |
| 183 | + } |
| 184 | + } |
| 185 | + return true; |
| 186 | + } |
| 187 | + |
| 188 | + private long dataEvolutionMergedRowCount() { |
| 189 | + long sum = 0L; |
| 190 | + RangeHelper<DataFileMeta> rangeHelper = |
| 191 | + new RangeHelper<>( |
| 192 | + DataFileMeta::nonNullFirstRowId, |
| 193 | + f -> f.nonNullFirstRowId() + f.rowCount() - 1); |
| 194 | + List<List<DataFileMeta>> ranges = rangeHelper.mergeOverlappingRanges(dataFiles); |
| 195 | + for (List<DataFileMeta> group : ranges) { |
| 196 | + long maxCount = 0; |
| 197 | + for (DataFileMeta file : group) { |
| 198 | + maxCount = Math.max(maxCount, file.rowCount()); |
| 199 | + } |
| 200 | + sum += maxCount; |
| 201 | + } |
| 202 | + return sum; |
170 | 203 | } |
171 | 204 |
|
172 | 205 | public Object minValue(int fieldIndex, DataField dataField, SimpleStatsEvolutions evolutions) { |
|
0 commit comments