Skip to content

Commit eabd9b1

Browse files
committed
Merge branch 'main' into Fix_get_file_bytes
2 parents e92d5c8 + f5d261a commit eabd9b1

3 files changed

Lines changed: 129 additions & 2 deletions

File tree

src/main/java/org/apache/datasketches/sampling/VarOptItemsSketch.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -827,8 +827,8 @@ void update(final T item, final double weight, final boolean mark) {
827827
if (item == null) {
828828
return;
829829
}
830-
if (weight <= 0.0) {
831-
throw new SketchesArgumentException("Item weights must be strictly positive: "
830+
if (weight <= 0.0 || Double.isNaN(weight) || Double.isInfinite(weight)) {
831+
throw new SketchesArgumentException("Item weights must be strictly positive and finite number: "
832832
+ weight + ", for item " + item.toString());
833833
}
834834
++n_;

src/test/java/org/apache/datasketches/sampling/VarOptItemsSketchTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,18 @@ public void checkInvalidWeight() {
264264
vis.update("invalidWeight", -1.0); // should fail
265265
}
266266

267+
@Test(expectedExceptions = SketchesArgumentException.class)
268+
public void checkNaNWeight() {
269+
final VarOptItemsSketch<String> vis = VarOptItemsSketch.newInstance(5);
270+
vis.update("invalidWeight", Double.NaN);
271+
}
272+
273+
@Test(expectedExceptions = SketchesArgumentException.class)
274+
public void checkInfiniteWeight() {
275+
final VarOptItemsSketch<String> vis = VarOptItemsSketch.newInstance(5);
276+
vis.update("invalidWeight", Double.POSITIVE_INFINITY);
277+
}
278+
267279
@Test
268280
public void checkCorruptSerializedWeight() {
269281
final VarOptItemsSketch<String> vis = VarOptItemsSketch.newInstance(24);

src/test/java/org/apache/datasketches/tuple/strings/AosSketchCrossLanguageTest.java

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,25 @@
1919

2020
package org.apache.datasketches.tuple.strings;
2121

22+
import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES;
2223
import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
24+
import static org.apache.datasketches.common.TestUtil.cppPath;
2325
import static org.apache.datasketches.common.TestUtil.putBytesToJavaPath;
2426
import static org.testng.Assert.assertEquals;
2527
import static org.testng.Assert.assertFalse;
28+
import static org.testng.Assert.assertTrue;
2629

2730
import java.io.IOException;
31+
import java.lang.foreign.MemorySegment;
32+
import java.nio.file.Files;
33+
import java.util.Arrays;
34+
import java.util.HashSet;
35+
import java.util.List;
36+
import java.util.Set;
2837

2938
import org.apache.datasketches.common.ResizeFactor;
39+
import org.apache.datasketches.tuple.TupleSketch;
40+
import org.apache.datasketches.tuple.TupleSketchIterator;
3041
import org.testng.annotations.Test;
3142

3243
/**
@@ -108,4 +119,108 @@ public void generateBinariesForCompatibilityTestingEmptyStrings() throws IOExcep
108119

109120
putBytesToJavaPath("aos_empty_strings_java.sk", sk.compact().toByteArray());
110121
}
122+
123+
@Test(groups = {CHECK_CPP_FILES})
124+
public void deserializeFromCppOneString() throws IOException {
125+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
126+
for (int n : nArr) {
127+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_n" + n + "_cpp.sk"));
128+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
129+
assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
130+
assertEquals(sketch.getEstimate(), n, n * 0.03);
131+
assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode());
132+
133+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
134+
while (it.next()) {
135+
assertTrue(it.getHash() < sketch.getThetaLong());
136+
final String[] summary = it.getSummary().getValue();
137+
assertEquals(summary.length, 1);
138+
}
139+
}
140+
}
141+
142+
@Test(groups = {CHECK_CPP_FILES})
143+
public void deserializeFromCppThreeStrings() throws IOException {
144+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
145+
for (int n : nArr) {
146+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_3_n" + n + "_cpp.sk"));
147+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
148+
assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
149+
assertEquals(sketch.getEstimate(), n, n * 0.03);
150+
assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode());
151+
152+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
153+
while (it.next()) {
154+
assertTrue(it.getHash() < sketch.getThetaLong());
155+
final String[] summary = it.getSummary().getValue();
156+
assertEquals(summary.length, 3);
157+
}
158+
}
159+
}
160+
161+
@Test(groups = {CHECK_CPP_FILES})
162+
public void deserializeFromCppOneStringNonEmptyNoEntries() throws IOException {
163+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_non_empty_no_entries_cpp.sk"));
164+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
165+
166+
assertFalse(sketch.isEmpty());
167+
assertEquals(sketch.getRetainedEntries(), 0);
168+
}
169+
170+
@Test(groups = {CHECK_CPP_FILES})
171+
public void deserializeFromCppMultiKeyStrings() throws IOException {
172+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
173+
for (int n : nArr) {
174+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_multikey_n" + n + "_cpp.sk"));
175+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
176+
assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
177+
assertEquals(sketch.getEstimate(), n, n * 0.03);
178+
assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode());
179+
180+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
181+
while (it.next()) {
182+
assertTrue(it.getHash() < sketch.getThetaLong());
183+
final String[] summary = it.getSummary().getValue();
184+
assertEquals(summary.length, 1);
185+
}
186+
}
187+
}
188+
189+
@Test(groups = {CHECK_CPP_FILES})
190+
public void deserializeFromCppUnicodeStrings() throws IOException {
191+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_unicode_cpp.sk"));
192+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
193+
assertFalse(sketch.isEmpty());
194+
assertFalse(sketch.isEstimationMode());
195+
assertEquals(sketch.getEstimate(), 3.0);
196+
197+
final Set<List<String>> summaries = getSummaries(sketch);
198+
assertTrue(summaries.contains(Arrays.asList("밸류", "값")));
199+
assertTrue(summaries.contains(Arrays.asList("📦", "🎁")));
200+
assertTrue(summaries.contains(Arrays.asList("ценить1", "ценить2")));
201+
}
202+
203+
@Test(groups = {CHECK_CPP_FILES})
204+
public void deserializeFromCppEmptyStrings() throws IOException {
205+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_empty_strings_cpp.sk"));
206+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
207+
assertFalse(sketch.isEmpty());
208+
assertFalse(sketch.isEstimationMode());
209+
assertEquals(sketch.getEstimate(), 3.0);
210+
211+
final Set<List<String>> summaries = getSummaries(sketch);
212+
assertTrue(summaries.contains(Arrays.asList("empty_key_value")));
213+
assertTrue(summaries.contains(Arrays.asList("")));
214+
assertTrue(summaries.contains(Arrays.asList("", "")));
215+
}
216+
217+
private static Set<List<String>> getSummaries(final TupleSketch<ArrayOfStringsSummary> sketch) {
218+
final Set<List<String>> summaries = new HashSet<>();
219+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
220+
while (it.next()) {
221+
assertTrue(it.getHash() < sketch.getThetaLong());
222+
summaries.add(Arrays.asList(it.getSummary().getValue()));
223+
}
224+
return summaries;
225+
}
111226
}

0 commit comments

Comments
 (0)