Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions src/main/java/org/apache/datasketches/req/ReqSerDe.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.datasketches.req;

import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.lang.Math.round;

import java.lang.foreign.MemorySegment;
Expand Down Expand Up @@ -204,11 +203,17 @@ static final Compactor extractCompactor(final PositionalSegment posSeg, final bo
final int count = posSeg.getInt();
final float[] arr = new float[count];
posSeg.getFloatArray(arr, 0, count);
float minItem = Float.MAX_VALUE;
float maxItem = Float.MIN_VALUE;
float minItem = Float.NaN;
float maxItem = Float.NaN;
for (int i = 0; i < count; i++) {
minItem = min(minItem, arr[i]);
maxItem = max(maxItem, arr[i]);
final float item = arr[i];
if (Float.isNaN(minItem)) {
minItem = item;
maxItem = item;
} else {
if (item < minItem) { minItem = item; }
if (item > maxItem) { maxItem = item; }
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have taken a closer look at this, and what you have here is not correct either. If a NaN should happen to appear, it would reset both the minItem and maxItem values to NaN, erasing the tracked minItem and maxItem values.

This extractCompactor is part of the deserialization process, and since NaNs are rejected during sketch construction, they should not appear in the serialization data. Nonetheless, this section of code is just trying to reconstruct the min and max values of this specific compactor.

These specific min and max values here are only used in a sketch that is in EXACT format, which has a single compactor. The min and max values are not stored in the serialization structure itself and must be derived dynamically from the compactor. This keeps the overall storage small for small data streams.

Note that in the normal, ESTIMATION format the min and max values are stored in the serialization structure itself, so the min and max values being computed here are not used. (This means the error you observed where it looses negative values probably occurred with a very small data set.)

I think we can trust that the values being examined from the serialized data structure do not contain NaNs. This means that to correct the original error, all we need to do is correct the initialization of the min and max values to +/- infinity. So the corrected section should look like the following, which will preserve any infinities if they exist:

    posSeg.getFloatArray(arr, 0, count);
    float minItem = Float.POSITIVE_INFINITY;   // changed line
    float maxItem = Float.NEGATIVE_INFINITY; // changed line 
    for (int i = 0; i < count; i++) {
      minItem = min(minItem, arr[i]);
      maxItem = max(maxItem, arr[i]);
    }

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed it to follow the code.

}
final int delta = 2 * sectionSize * numSections;
final int nomCap = 2 * delta;
Expand Down
65 changes: 65 additions & 0 deletions src/test/java/org/apache/datasketches/req/ReqCompactorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,69 @@ private static void checkSerDeImpl(final int k, final boolean hra) {
assertEquals(fbuf2.getDelta(), delta);
assertTrue(fbuf.isEqualTo(fbuf2));
}

@Test
public void checkSerDeWithNegativeValues() {
checkSerDeNegativeImpl(12, false);
checkSerDeNegativeImpl(12, true);
}

private static void checkSerDeNegativeImpl(final int k, final boolean hra) {
final ReqCompactor c1 = new ReqCompactor((byte)0, hra, k, null);
final int nomCap = 2 * 3 * k;
final FloatBuffer fbuf = c1.getBuffer();

for (int i = 1; i <= nomCap; i++) {
fbuf.append(-i); //all negative values
}
final byte[] c1ser = c1.toByteArray();
final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(c1ser));
final Compactor compactor = ReqSerDe.extractCompactor(posSeg, fbuf.isSorted(), hra);
assertEquals(compactor.minItem, -nomCap);
assertEquals(compactor.maxItem, -1f);
}

@Test
public void checkSerDeWithMixedValues() {
checkSerDeMixedImpl(12, false);
checkSerDeMixedImpl(12, true);
}

private static void checkSerDeMixedImpl(final int k, final boolean hra) {
final ReqCompactor c1 = new ReqCompactor((byte)0, hra, k, null);
final int nomCap = 2 * 3 * k;
final int half = nomCap / 2;
final FloatBuffer fbuf = c1.getBuffer();

for (int i = 0; i < nomCap; i++) {
fbuf.append(i - half); // range: -half to half-1
}
final byte[] c1ser = c1.toByteArray();
final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(c1ser));
final Compactor compactor = ReqSerDe.extractCompactor(posSeg, fbuf.isSorted(), hra);
assertEquals(compactor.minItem, (float) -half);
assertEquals(compactor.maxItem, (float) (half - 1));
}

@Test
public void checkSerDeWithInfiniteValues() {
checkSerDeInfiniteImpl(12, false);
checkSerDeInfiniteImpl(12, true);
}

private static void checkSerDeInfiniteImpl(final int k, final boolean hra) {
final ReqCompactor c1 = new ReqCompactor((byte)0, hra, k, null);
final FloatBuffer fbuf = c1.getBuffer();

fbuf.append(Float.POSITIVE_INFINITY);
fbuf.append(1);
fbuf.append(Float.NEGATIVE_INFINITY);
fbuf.append(-1);

final byte[] c1ser = c1.toByteArray();
final PositionalSegment posSeg = PositionalSegment.wrap(MemorySegment.ofArray(c1ser));
final Compactor compactor = ReqSerDe.extractCompactor(posSeg, fbuf.isSorted(), hra);
assertEquals(compactor.minItem, Float.NEGATIVE_INFINITY);
assertEquals(compactor.maxItem, Float.POSITIVE_INFINITY);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,30 @@ public void generateBinariesForCompatibilityTesting() throws IOException {
}
}

@Test(groups = {GENERATE_JAVA_FILES})
public void generateNegativeBinariesForCompatibilityTesting() throws IOException {
final int[] nArr = {1, 10};
for (final int n: nArr) {
final ReqSketch sk = ReqSketch.builder().build();
for (int i = 1; i <= n; i++) {
sk.update(-i);
}
putBytesToJavaPath("req_float_negative_n" + n + "_java.sk", sk.toByteArray());
}
}

@Test(groups = {GENERATE_JAVA_FILES})
public void generateMixedBinariesForCompatibilityTesting() throws IOException {
final int[] nArr = {1, 10};
for (final int n: nArr) {
final ReqSketch sk = ReqSketch.builder().build();
for (int i = -n; i <= n; i++) {
sk.update(i);
}
putBytesToJavaPath("req_float_mixed_n" + n + "_java.sk", sk.toByteArray());
}
}

@Test(groups = {CHECK_CPP_FILES})
public void deserializeFromCpp() throws IOException {
final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
Expand All @@ -74,5 +98,4 @@ public void deserializeFromCpp() throws IOException {
}
}
}

}
Loading