Skip to content

Commit eac2798

Browse files
authored
[core] Rename 'deletion-vectors.version' to 'deletion-vectors.bitmap64' (apache#5552)
1 parent 601579b commit eac2798

File tree

25 files changed

+315
-441
lines changed

25 files changed

+315
-441
lines changed

docs/layouts/shortcodes/generated/core_configuration.html

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,16 +297,16 @@
297297
<td>The target size of deletion vector index file.</td>
298298
</tr>
299299
<tr>
300-
<td><h5>deletion-vectors.enabled</h5></td>
300+
<td><h5>deletion-vectors.bitmap64</h5></td>
301301
<td style="word-wrap: break-word;">false</td>
302302
<td>Boolean</td>
303-
<td>Whether to enable deletion vectors mode. In this mode, index files containing deletion vectors are generated when data is written, which marks the data for deletion. During read operations, by applying these index files, merging can be avoided.</td>
303+
<td>Enable 64 bit bitmap implementation. Note that only 64 bit bitmap implementation is compatible with Iceberg.</td>
304304
</tr>
305305
<tr>
306-
<td><h5>deletion-vectors.version</h5></td>
307-
<td style="word-wrap: break-word;">1</td>
308-
<td>Integer</td>
309-
<td>The version of deletion vector, currently support v1 and v2, default version is 1.</td>
306+
<td><h5>deletion-vectors.enabled</h5></td>
307+
<td style="word-wrap: break-word;">false</td>
308+
<td>Boolean</td>
309+
<td>Whether to enable deletion vectors mode. In this mode, index files containing deletion vectors are generated when data is written, which marks the data for deletion. During read operations, by applying these index files, merging can be avoided.</td>
310310
</tr>
311311
<tr>
312312
<td><h5>dynamic-bucket.assigner-parallelism</h5></td>

docs/static/rest-catalog-open-api.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,11 @@ paths:
232232
in: query
233233
schema:
234234
type: string
235+
- name: tableNamePattern
236+
description: A sql LIKE pattern (% and _) for table names.
237+
in: query
238+
schema:
239+
type: string
235240
responses:
236241
"200":
237242
description: OK
@@ -305,6 +310,11 @@ paths:
305310
in: query
306311
schema:
307312
type: string
313+
- name: tableNamePattern
314+
description: A sql LIKE pattern (% and _) for table names.
315+
in: query
316+
schema:
317+
type: string
308318
responses:
309319
"200":
310320
description: OK
@@ -658,6 +668,11 @@ paths:
658668
in: query
659669
schema:
660670
type: string
671+
- name: partitionNamePattern
672+
description: A sql LIKE pattern (% and _) for partition names.
673+
in: query
674+
schema:
675+
type: string
661676
responses:
662677
"200":
663678
description: OK
@@ -896,6 +911,11 @@ paths:
896911
in: query
897912
schema:
898913
type: string
914+
- name: viewNamePattern
915+
description: A sql LIKE pattern (% and _) for view names.
916+
in: query
917+
schema:
918+
type: string
899919
responses:
900920
"200":
901921
description: OK
@@ -969,6 +989,11 @@ paths:
969989
in: query
970990
schema:
971991
type: string
992+
- name: viewNamePattern
993+
description: A sql LIKE pattern (% and _) for view names.
994+
in: query
995+
schema:
996+
type: string
972997
responses:
973998
"200":
974999
description: OK

paimon-common/src/main/java/org/apache/paimon/CoreOptions.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,12 +1510,12 @@ public class CoreOptions implements Serializable {
15101510
.defaultValue(MemorySize.ofMebiBytes(2))
15111511
.withDescription("The target size of deletion vector index file.");
15121512

1513-
public static final ConfigOption<Integer> DELETION_VECTOR_VERSION =
1514-
key("deletion-vectors.version")
1515-
.intType()
1516-
.defaultValue(1)
1513+
public static final ConfigOption<Boolean> DELETION_VECTOR_BITMAP64 =
1514+
key("deletion-vectors.bitmap64")
1515+
.booleanType()
1516+
.defaultValue(false)
15171517
.withDescription(
1518-
"The version of deletion vector, currently support v1 and v2, default version is 1.");
1518+
"Enable 64 bit bitmap implementation. Note that only 64 bit bitmap implementation is compatible with Iceberg.");
15191519

15201520
public static final ConfigOption<Boolean> DELETION_FORCE_PRODUCE_CHANGELOG =
15211521
key("delete.force-produce-changelog")
@@ -2629,8 +2629,8 @@ public MemorySize deletionVectorIndexFileTargetSize() {
26292629
return options.get(DELETION_VECTOR_INDEX_FILE_TARGET_SIZE);
26302630
}
26312631

2632-
public int deletionVectorVersion() {
2633-
return options.get(DELETION_VECTOR_VERSION);
2632+
public boolean deletionVectorBitmap64() {
2633+
return options.get(DELETION_VECTOR_BITMAP64);
26342634
}
26352635

26362636
public FileIndexOptions indexColumnsOptions() {

paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ public IndexFileHandler newIndexFileHandler() {
243243
bucketMode() == BucketMode.BUCKET_UNAWARE
244244
? options.deletionVectorIndexFileTargetSize()
245245
: MemorySize.ofBytes(Long.MAX_VALUE),
246-
options.deletionVectorVersion()));
246+
options.deletionVectorBitmap64()));
247247
}
248248

249249
@Override

paimon-core/src/main/java/org/apache/paimon/deletionvectors/Bitmap64DeletionVector.java

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import org.apache.paimon.utils.Preconditions;
2323
import org.apache.paimon.utils.RoaringBitmap32;
2424

25+
import java.io.DataOutputStream;
26+
import java.io.IOException;
2527
import java.nio.ByteBuffer;
2628
import java.nio.ByteOrder;
2729
import java.util.Objects;
@@ -35,8 +37,6 @@
3537
*/
3638
public class Bitmap64DeletionVector implements DeletionVector {
3739

38-
public static final int VERSION = 2;
39-
4040
public static final int MAGIC_NUMBER = 1681511377;
4141
public static final int LENGTH_SIZE_BYTES = 4;
4242
public static final int CRC_SIZE_BYTES = 4;
@@ -90,12 +90,7 @@ public long getCardinality() {
9090
}
9191

9292
@Override
93-
public int version() {
94-
return VERSION;
95-
}
96-
97-
@Override
98-
public byte[] serializeToBytes() {
93+
public int serializeTo(DataOutputStream out) throws IOException {
9994
roaringBitmap.runLengthEncode(); // run-length encode the bitmap before serializing
10095
int bitmapDataLength = computeBitmapDataLength(roaringBitmap); // magic bytes + bitmap
10196
byte[] bytes = new byte[LENGTH_SIZE_BYTES + bitmapDataLength + CRC_SIZE_BYTES];
@@ -106,18 +101,8 @@ public byte[] serializeToBytes() {
106101
int crc = computeChecksum(bytes, bitmapDataLength);
107102
buffer.putInt(crcOffset, crc);
108103
buffer.rewind();
109-
return bytes;
110-
}
111-
112-
public static DeletionVector deserializeFromBytes(byte[] bytes) {
113-
ByteBuffer buffer = ByteBuffer.wrap(bytes);
114-
int bitmapDataLength = readBitmapDataLength(buffer, bytes.length);
115-
OptimizedRoaringBitmap64 bitmap = deserializeBitmap(bytes, bitmapDataLength);
116-
int crc = computeChecksum(bytes, bitmapDataLength);
117-
int crcOffset = LENGTH_SIZE_BYTES + bitmapDataLength;
118-
int expectedCrc = buffer.getInt(crcOffset);
119-
Preconditions.checkArgument(crc == expectedCrc, "Invalid CRC");
120-
return new Bitmap64DeletionVector(bitmap);
104+
out.write(bytes);
105+
return bytes.length;
121106
}
122107

123108
public static DeletionVector deserializeFromBitmapDataBytes(byte[] bytes) {
@@ -182,7 +167,6 @@ private static int computeChecksum(byte[] bytes, int bitmapDataLength) {
182167

183168
protected static int toLittleEndianInt(int bigEndianInt) {
184169
byte[] bytes = ByteBuffer.allocate(4).putInt(bigEndianInt).array();
185-
186170
return ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).getInt();
187171
}
188172

paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,14 @@
2525
import java.io.IOException;
2626
import java.nio.ByteBuffer;
2727
import java.util.Objects;
28+
import java.util.zip.CRC32;
2829

2930
/**
3031
* A {@link DeletionVector} based on {@link RoaringBitmap32}, it only supports files with row count
3132
* not exceeding {@link RoaringBitmap32#MAX_VALUE}.
3233
*/
3334
public class BitmapDeletionVector implements DeletionVector {
3435

35-
public static final int VERSION = 1;
36-
3736
public static final int MAGIC_NUMBER = 1581511376;
3837
public static final int MAGIC_NUMBER_SIZE_BYTES = 4;
3938

@@ -85,36 +84,22 @@ public long getCardinality() {
8584
}
8685

8786
@Override
88-
public int version() {
89-
return VERSION;
90-
}
91-
92-
@Override
93-
public byte[] serializeToBytes() {
87+
public int serializeTo(DataOutputStream out) {
9488
try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
9589
DataOutputStream dos = new DataOutputStream(bos)) {
9690
dos.writeInt(MAGIC_NUMBER);
9791
roaringBitmap.serialize(dos);
98-
return bos.toByteArray();
92+
byte[] data = bos.toByteArray();
93+
int size = data.length;
94+
out.writeInt(size);
95+
out.write(data);
96+
out.writeInt(calculateChecksum(data));
97+
return size;
9998
} catch (Exception e) {
10099
throw new RuntimeException("Unable to serialize deletion vector", e);
101100
}
102101
}
103102

104-
public static DeletionVector deserializeFromBytes(byte[] bytes) {
105-
try {
106-
ByteBuffer buffer = ByteBuffer.wrap(bytes);
107-
int magicNum = buffer.getInt();
108-
if (magicNum == MAGIC_NUMBER) {
109-
return deserializeFromByteBuffer(buffer);
110-
} else {
111-
throw new RuntimeException("Invalid magic number: " + magicNum);
112-
}
113-
} catch (IOException e) {
114-
throw new RuntimeException("Unable to deserialize deletion vector", e);
115-
}
116-
}
117-
118103
/**
119104
* Note: the result is read only, do not call any modify operation outside.
120105
*
@@ -153,4 +138,10 @@ public boolean equals(Object o) {
153138
public int hashCode() {
154139
return Objects.hashCode(roaringBitmap);
155140
}
141+
142+
public static int calculateChecksum(byte[] bytes) {
143+
CRC32 crc = new CRC32();
144+
crc.update(bytes);
145+
return (int) crc.getValue();
146+
}
156147
}

0 commit comments

Comments
 (0)