Skip to content

Commit 714f12e

Browse files
committed
test: add ArrayOfStrings Sketch C++ compat cases
1 parent 53b312e commit 714f12e

1 file changed

Lines changed: 114 additions & 0 deletions

File tree

src/test/java/org/apache/datasketches/tuple/strings/AosSketchCrossLanguageTest.java

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,25 @@
1919

2020
package org.apache.datasketches.tuple.strings;
2121

22+
import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES;
2223
import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
24+
import static org.apache.datasketches.common.TestUtil.cppPath;
2325
import static org.apache.datasketches.common.TestUtil.javaPath;
2426
import static org.testng.Assert.assertEquals;
2527
import static org.testng.Assert.assertFalse;
28+
import static org.testng.Assert.assertTrue;
2629

30+
import java.lang.foreign.MemorySegment;
2731
import java.io.IOException;
2832
import java.nio.file.Files;
33+
import java.util.Arrays;
34+
import java.util.HashSet;
35+
import java.util.List;
36+
import java.util.Set;
2937

3038
import org.apache.datasketches.common.ResizeFactor;
39+
import org.apache.datasketches.tuple.TupleSketch;
40+
import org.apache.datasketches.tuple.TupleSketchIterator;
3141
import org.testng.annotations.Test;
3242

3343
/**
@@ -109,4 +119,108 @@ public void generateBinariesForCompatibilityTestingEmptyStrings() throws IOExcep
109119

110120
Files.newOutputStream(javaPath.resolve("aos_empty_strings_java.sk")).write(sk.compact().toByteArray());
111121
}
122+
123+
@Test(groups = {CHECK_CPP_FILES})
124+
public void deserializeFromCppOneString() throws IOException {
125+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
126+
for (int n : nArr) {
127+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_n" + n + "_cpp.sk"));
128+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
129+
assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
130+
assertEquals(sketch.getEstimate(), n, n * 0.03);
131+
assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode());
132+
133+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
134+
while (it.next()) {
135+
assertTrue(it.getHash() < sketch.getThetaLong());
136+
final String[] summary = it.getSummary().getValue();
137+
assertEquals(summary.length, 1);
138+
}
139+
}
140+
}
141+
142+
@Test(groups = {CHECK_CPP_FILES})
143+
public void deserializeFromCppThreeStrings() throws IOException {
144+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
145+
for (int n : nArr) {
146+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_3_n" + n + "_cpp.sk"));
147+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
148+
assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
149+
assertEquals(sketch.getEstimate(), n, n * 0.03);
150+
assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode());
151+
152+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
153+
while (it.next()) {
154+
assertTrue(it.getHash() < sketch.getThetaLong());
155+
final String[] summary = it.getSummary().getValue();
156+
assertEquals(summary.length, 3);
157+
}
158+
}
159+
}
160+
161+
@Test(groups = {CHECK_CPP_FILES})
162+
public void deserializeFromCppOneStringNonEmptyNoEntries() throws IOException {
163+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_non_empty_no_entries_cpp.sk"));
164+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
165+
166+
assertFalse(sketch.isEmpty());
167+
assertEquals(sketch.getRetainedEntries(), 0);
168+
}
169+
170+
@Test(groups = {CHECK_CPP_FILES})
171+
public void deserializeFromCppMultiKeyStrings() throws IOException {
172+
final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000};
173+
for (int n : nArr) {
174+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_multikey_n" + n + "_cpp.sk"));
175+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
176+
assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty());
177+
assertEquals(sketch.getEstimate(), n, n * 0.03);
178+
assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode());
179+
180+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
181+
while (it.next()) {
182+
assertTrue(it.getHash() < sketch.getThetaLong());
183+
final String[] summary = it.getSummary().getValue();
184+
assertEquals(summary.length, 1);
185+
}
186+
}
187+
}
188+
189+
@Test(groups = {CHECK_CPP_FILES})
190+
public void deserializeFromCppUnicodeStrings() throws IOException {
191+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_unicode_cpp.sk"));
192+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
193+
assertFalse(sketch.isEmpty());
194+
assertFalse(sketch.isEstimationMode());
195+
assertEquals(sketch.getEstimate(), 3.0);
196+
197+
final Set<List<String>> summaries = getSummaries(sketch);
198+
assertTrue(summaries.contains(Arrays.asList("밸류", "값")));
199+
assertTrue(summaries.contains(Arrays.asList("📦", "🎁")));
200+
assertTrue(summaries.contains(Arrays.asList("ценить1", "ценить2")));
201+
}
202+
203+
@Test(groups = {CHECK_CPP_FILES})
204+
public void deserializeFromCppEmptyStrings() throws IOException {
205+
final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_empty_strings_cpp.sk"));
206+
final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer());
207+
assertFalse(sketch.isEmpty());
208+
assertFalse(sketch.isEstimationMode());
209+
assertEquals(sketch.getEstimate(), 3.0);
210+
211+
final Set<List<String>> summaries = getSummaries(sketch);
212+
assertTrue(summaries.contains(Arrays.asList("empty_key_value")));
213+
assertTrue(summaries.contains(Arrays.asList("")));
214+
assertTrue(summaries.contains(Arrays.asList("", "")));
215+
}
216+
217+
private static Set<List<String>> getSummaries(final TupleSketch<ArrayOfStringsSummary> sketch) {
218+
final Set<List<String>> summaries = new HashSet<>();
219+
final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator();
220+
while (it.next()) {
221+
assertTrue(it.getHash() < sketch.getThetaLong());
222+
summaries.add(Arrays.asList(it.getSummary().getValue()));
223+
}
224+
return summaries;
225+
}
112226
}

0 commit comments

Comments
 (0)