Skip to content

Commit 1944e56

Browse files
committed
new serialisation format
1 parent 2954664 commit 1944e56

16 files changed

Lines changed: 686 additions & 649 deletions

File tree

bsi/build.gradle.kts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ tasks.test {
1313
systemProperty("kryo.unsafe", "false")
1414
useJUnitPlatform()
1515
failFast = true
16+
17+
// Define the memory requirements of tests, to prevent issues in CI while OK locally
18+
minHeapSize = "2G"
19+
maxHeapSize = "2G"
20+
1621
testLogging {
1722
// We exclude 'passed' events
1823
events( "skipped", "failed")

bsi/src/main/java/org/roaringbitmap/bsi/longlong/Roaring64BitmapSliceIndex.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,24 +254,31 @@ public void deserialize(ByteBuffer buffer) throws IOException {
254254
ebm.deserialize(buffer);
255255
this.ebM = ebm;
256256
// read back
257-
buffer.position(buffer.position() + ebm.getSizeInBytes());
258257
int bitDepth = buffer.getInt();
259258
Roaring64Bitmap[] ba = new Roaring64Bitmap[bitDepth];
260259
for (int i = 0; i < bitDepth; i++) {
261260
Roaring64Bitmap rb = new Roaring64Bitmap();
262261
rb.deserialize(buffer);
263262
ba[i] = rb;
264-
buffer.position(buffer.position() + rb.getSizeInBytes());
265263
}
266264
this.bA = ba;
267265
}
268266

269267
public int serializedSizeInBytes() {
270-
int size = 0;
268+
long size = 0;
271269
for (Roaring64Bitmap rb : this.bA) {
272-
size += rb.getSizeInBytes();
270+
size += rb.serializedSizeInBytes();
271+
}
272+
size += 8 + //minValue
273+
8 + //maxValue
274+
1 + //runOptimized
275+
4 + //bitDepth
276+
this.ebM.serializedSizeInBytes();
277+
if (size <= Integer.MAX_VALUE) {
278+
return (int) size;
279+
} else {
280+
throw new IllegalStateException("the serialized size is larger than Integer.MAX_VALUE");
273281
}
274-
return 8 + 8 + 1 + 4 + this.ebM.getSizeInBytes() + size;
275282
}
276283

277284
/**

roaringbitmap/src/main/java/org/roaringbitmap/art/Art.java

Lines changed: 38 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
package org.roaringbitmap.art;
22

33
import org.roaringbitmap.ArraysShim;
4+
import org.roaringbitmap.longlong.HighLowContainer;
45
import org.roaringbitmap.longlong.LongUtils;
56

67
import java.io.DataInput;
78
import java.io.DataOutput;
89
import java.io.IOException;
910
import java.nio.ByteBuffer;
11+
import java.nio.ByteOrder;
1012
import java.util.Arrays;
1113

14+
import static java.nio.ByteOrder.LITTLE_ENDIAN;
15+
1216
/**
1317
* See: https://db.in.tum.de/~leis/papers/ART.pdf a cpu cache friendly main memory data structure.
1418
* At our case, the LeafNode's key is always 48 bit size. The high 48 bit keys here are compared
@@ -365,24 +369,8 @@ public LeafNode last() {
365369
return getExtremeLeaf(true);
366370
}
367371

368-
public void serializeArt(DataOutput dataOutput) throws IOException {
369-
dataOutput.writeLong(Long.reverseBytes(keySize));
370-
serialize(root, dataOutput);
371-
}
372-
373-
public void deserializeArt(DataInput dataInput) throws IOException {
374-
keySize = Long.reverseBytes(dataInput.readLong());
375-
root = deserialize(dataInput);
376-
}
377-
378-
public void serializeArt(ByteBuffer byteBuffer) throws IOException {
379-
byteBuffer.putLong(keySize);
380-
serialize(root, byteBuffer);
381-
}
382-
383-
public void deserializeArt(ByteBuffer byteBuffer) throws IOException {
384-
keySize = byteBuffer.getLong();
385-
root = deserialize(byteBuffer);
372+
public long getKeySize() {
373+
return keySize;
386374
}
387375

388376
public LeafNodeIterator leafNodeIterator(boolean reverse, Containers containers) {
@@ -393,116 +381,51 @@ public LeafNodeIterator leafNodeIteratorFrom(long bound, boolean reverse, Contai
393381
return new LeafNodeIterator(this, reverse, containers, bound);
394382
}
395383

396-
private void serialize(Node node, DataOutput dataOutput) throws IOException {
397-
if (node instanceof BranchNode) {
398-
BranchNode branchNode = (BranchNode)node;
399-
// serialize the internal node itself first
400-
branchNode.serialize(dataOutput);
401-
// then all the internal node's children
402-
int nexPos = branchNode.getNextLargerPos(BranchNode.ILLEGAL_IDX);
403-
while (nexPos != BranchNode.ILLEGAL_IDX) {
404-
// serialize all the not null child node
405-
Node child = branchNode.getChild(nexPos);
406-
serialize(child, dataOutput);
407-
nexPos = branchNode.getNextLargerPos(nexPos);
408-
}
409-
} else {
410-
// serialize the leaf node
411-
node.serialize(dataOutput);
384+
public void serializeArt(DataOutput dataOutput, HighLowContainer highLow) throws IOException {
385+
dataOutput.writeLong(Long.reverseBytes(keySize));
386+
if (keySize != 0L) {
387+
root.serialize(dataOutput, highLow);
412388
}
413389
}
414-
415-
private void serialize(Node node, ByteBuffer byteBuffer) throws IOException {
416-
if (node instanceof BranchNode) {
417-
BranchNode branchNode = (BranchNode)node;
418-
// serialize the internal node itself first
419-
branchNode.serialize(byteBuffer);
420-
// then all the internal node's children
421-
int nexPos = branchNode.getNextLargerPos(BranchNode.ILLEGAL_IDX);
422-
while (nexPos != BranchNode.ILLEGAL_IDX) {
423-
// serialize all the not null child node
424-
Node child = branchNode.getChild(nexPos);
425-
serialize(child, byteBuffer);
426-
nexPos = branchNode.getNextLargerPos(nexPos);
390+
public void serializeArt(ByteBuffer buffer, HighLowContainer highLow) throws IOException {
391+
ByteOrder originalOrder = buffer.order();
392+
buffer.order(LITTLE_ENDIAN);
393+
try {
394+
buffer.putLong(keySize);
395+
if (keySize != 0L) {
396+
root.serialize(buffer, highLow);
427397
}
428-
} else {
429-
// serialize the leaf node
430-
node.serialize(byteBuffer);
398+
} finally {
399+
buffer.order(originalOrder);
431400
}
432401
}
433402

434-
private Node deserialize(DataInput dataInput) throws IOException {
435-
Node oneNode = Node.deserialize(dataInput);
436-
if (oneNode == null) {
437-
return null;
438-
}
439-
if (oneNode instanceof LeafNode) {
440-
return oneNode;
441-
} else {
442-
BranchNode branch = (BranchNode) oneNode;
443-
// internal node
444-
int count = branch.count;
445-
// all the not null child nodes
446-
Node[] children = new Node[count];
447-
for (int i = 0; i < count; i++) {
448-
Node child = deserialize(dataInput);
449-
children[i] = child;
450-
}
451-
branch.replaceChildren(children);
452-
return branch;
403+
public void deserializeArt(DataInput dataInput, HighLowContainer highLow) throws IOException {
404+
keySize = Long.reverseBytes(dataInput.readLong());
405+
if (keySize != 0L) {
406+
root = Node.deserialize(dataInput, highLow);
453407
}
454408
}
455409

456-
private Node deserialize(ByteBuffer byteBuffer) throws IOException {
457-
Node oneNode = Node.deserialize(byteBuffer);
458-
if (oneNode == null) {
459-
return null;
460-
}
461-
if (oneNode instanceof LeafNode) {
462-
return oneNode;
463-
} else {
464-
BranchNode branchNode = (BranchNode) oneNode;
465-
// internal node
466-
int count = branchNode.count;
467-
// all the not null child nodes
468-
Node[] children = new Node[count];
469-
for (int i = 0; i < count; i++) {
470-
Node child = deserialize(byteBuffer);
471-
children[i] = child;
410+
public void deserializeArt(ByteBuffer buffer, HighLowContainer highLow) throws IOException {
411+
ByteOrder originalOrder = buffer.order();
412+
buffer.order(LITTLE_ENDIAN);
413+
try {
414+
keySize = buffer.getLong();
415+
if (keySize != 0L) {
416+
root = Node.deserialize(buffer, highLow);
472417
}
473-
branchNode.replaceChildren(children);
474-
return branchNode;
418+
} finally {
419+
buffer.order(originalOrder);
475420
}
476421
}
477422

478-
public long serializeSizeInBytes() {
479-
return serializeSizeInBytes(this.root) + 8;
480-
}
481-
482-
public long getKeySize() {
483-
return keySize;
484-
}
485-
486-
private long serializeSizeInBytes(Node node) {
487-
if (node instanceof BranchNode) {
488-
BranchNode branchNode = (BranchNode) node;
489-
// serialize the internal node itself first
490-
int currentNodeSize = branchNode.serializeSizeInBytes();
491-
// then all the internal node's children
492-
long childrenTotalSize = 0L;
493-
int nexPos = branchNode.getNextLargerPos(BranchNode.ILLEGAL_IDX);
494-
while (nexPos != BranchNode.ILLEGAL_IDX) {
495-
// serialize all the not null child node
496-
Node child = branchNode.getChild(nexPos);
497-
long childSize = serializeSizeInBytes(child);
498-
nexPos = branchNode.getNextLargerPos(nexPos);
499-
childrenTotalSize += childSize;
500-
}
501-
return currentNodeSize + childrenTotalSize;
502-
} else {
503-
// serialize the leaf node
504-
int nodeSize = node.serializeSizeInBytes();
505-
return nodeSize;
423+
public long serializeSizeInBytes(HighLowContainer highLow) {
424+
long size = 8; // 8 bytes for the keySize
425+
if (!isEmpty()) {
426+
size += root.serializeSizeInBytes(highLow);
506427
}
428+
return size;
507429
}
430+
508431
}

0 commit comments

Comments
 (0)