Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions src/main/java/org/apache/datasketches/count/CountMinSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
import static java.lang.foreign.ValueLayout.JAVA_BYTE;
import java.util.Random;

import static org.apache.datasketches.common.SpecialValueLayouts.JAVA_INT_UNALIGNED_BIG_ENDIAN;
import static org.apache.datasketches.common.SpecialValueLayouts.JAVA_LONG_UNALIGNED_BIG_ENDIAN;
import static org.apache.datasketches.common.SpecialValueLayouts.JAVA_SHORT_UNALIGNED_BIG_ENDIAN;
import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED;


public class CountMinSketch {
Expand Down Expand Up @@ -113,7 +113,7 @@ int mask() {
*/
private static byte[] longToBytes(final long value) {
final MemorySegment segment = LONG_SEGMENT.get();
segment.set(JAVA_LONG_UNALIGNED_BIG_ENDIAN, 0, value);
segment.set(JAVA_LONG_UNALIGNED, 0, value);
return segment.toArray(JAVA_BYTE);
}

Expand Down Expand Up @@ -393,9 +393,9 @@ private int getSerializedSizeBytes() {
public byte[] toByteArray() {
final int serializedSizeBytes = getSerializedSizeBytes();
final MemorySegment wseg = MemorySegment.ofArray(new byte[serializedSizeBytes]);

long offset = 0;

// Long 0
final int preambleLongs = Family.COUNTMIN.getMinPreLongs();
wseg.set(JAVA_BYTE, offset++, (byte) preambleLongs);
Expand All @@ -406,31 +406,31 @@ public byte[] toByteArray() {
final int flagsByte = isEmpty() ? Flag.IS_EMPTY.mask() : 0;
wseg.set(JAVA_BYTE, offset++, (byte) flagsByte);
final int NULL_32 = 0;
wseg.set(JAVA_INT_UNALIGNED_BIG_ENDIAN, offset, NULL_32);
wseg.set(JAVA_INT_UNALIGNED, offset, NULL_32);
offset += 4;
Copy link
Copy Markdown
Member

@leerho leerho Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This (and following) is a use-case where the new datasketches.common.positional.PositionalSegment could be used. You might want to look at it. I used the term "positional" instead of "buffer", because "buffer" is a widely used generic term and doesn't convey what is actually going on. The PS would eliminate all of the offset tracking that you had to do by hand (with magic numbers).

I had already migrated theta and tuple, but when I got to bloomfilter, I decided there was a real need for it -- so I paused and created it. It is already used in bloomfilter, tdigest, kll, and req. I intend to go back and use it in the rest of the library where it makes sense. And here it makes perfect sense, but this suggestion is optional.

Nonetheless, if you decide not to use PS, I would recommend not using magic numbers for the offset adjustments and use the build-in static constants such as Integer.BYTES, Long.BYTES, etc. The code will become more obvious as to what you are doing and more robust. :-)

Again, thank you for your work on this!


// Long 1
wseg.set(JAVA_INT_UNALIGNED_BIG_ENDIAN, offset, numBuckets_);
wseg.set(JAVA_INT_UNALIGNED, offset, numBuckets_);
offset += 4;
wseg.set(JAVA_BYTE, offset++, numHashes_);
short hashSeed = Util.computeSeedHash(seed_);
wseg.set(JAVA_SHORT_UNALIGNED_BIG_ENDIAN, offset, hashSeed);
wseg.set(JAVA_SHORT_UNALIGNED, offset, hashSeed);
offset += 2;
final byte NULL_8 = 0;
wseg.set(JAVA_BYTE, offset++, NULL_8);

if (isEmpty()) {
return wseg.toArray(JAVA_BYTE);
}

wseg.set(JAVA_LONG_UNALIGNED_BIG_ENDIAN, offset, totalWeight_);
wseg.set(JAVA_LONG_UNALIGNED, offset, totalWeight_);
offset += 8;

for (long w: sketchArray_) {
wseg.set(JAVA_LONG_UNALIGNED_BIG_ENDIAN, offset, w);
wseg.set(JAVA_LONG_UNALIGNED, offset, w);
offset += 8;
}

return wseg.toArray(JAVA_BYTE);
}

Expand All @@ -448,13 +448,13 @@ public static CountMinSketch deserialize(final byte[] b, final long seed) {
final byte serialVersion = buf.get(JAVA_BYTE, offset++);
final byte familyId = buf.get(JAVA_BYTE, offset++);
final byte flagsByte = buf.get(JAVA_BYTE, offset++);
final int NULL_32 = buf.get(JAVA_INT_UNALIGNED_BIG_ENDIAN, offset);
final int NULL_32 = buf.get(JAVA_INT_UNALIGNED, offset);
offset += 4;

final int numBuckets = buf.get(JAVA_INT_UNALIGNED_BIG_ENDIAN, offset);
final int numBuckets = buf.get(JAVA_INT_UNALIGNED, offset);
offset += 4;
final byte numHashes = buf.get(JAVA_BYTE, offset++);
final short seedHash = buf.get(JAVA_SHORT_UNALIGNED_BIG_ENDIAN, offset);
final short seedHash = buf.get(JAVA_SHORT_UNALIGNED, offset);
offset += 2;
final byte NULL_8 = buf.get(JAVA_BYTE, offset++);

Comment thread
freakyzoidberg marked this conversation as resolved.
Expand All @@ -468,12 +468,12 @@ public static CountMinSketch deserialize(final byte[] b, final long seed) {
if (empty) {
return cms;
}
long w = buf.get(JAVA_LONG_UNALIGNED_BIG_ENDIAN, offset);
long w = buf.get(JAVA_LONG_UNALIGNED, offset);
offset += 8;
cms.totalWeight_ = w;

for (int i = 0; i < cms.sketchArray_.length; i++) {
cms.sketchArray_[i] = buf.get(JAVA_LONG_UNALIGNED_BIG_ENDIAN, offset);
cms.sketchArray_[i] = buf.get(JAVA_LONG_UNALIGNED, offset);
offset += 8;
}

Expand Down