|
19 | 19 |
|
20 | 20 | package org.apache.datasketches.tuple.strings; |
21 | 21 |
|
| 22 | +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; |
22 | 23 | import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; |
| 24 | +import static org.apache.datasketches.common.TestUtil.cppPath; |
23 | 25 | import static org.apache.datasketches.common.TestUtil.putBytesToJavaPath; |
24 | 26 | import static org.testng.Assert.assertEquals; |
25 | 27 | import static org.testng.Assert.assertFalse; |
| 28 | +import static org.testng.Assert.assertTrue; |
26 | 29 |
|
27 | 30 | import java.io.IOException; |
| 31 | +import java.lang.foreign.MemorySegment; |
| 32 | +import java.nio.file.Files; |
| 33 | +import java.util.Arrays; |
| 34 | +import java.util.HashSet; |
| 35 | +import java.util.List; |
| 36 | +import java.util.Set; |
28 | 37 |
|
29 | 38 | import org.apache.datasketches.common.ResizeFactor; |
| 39 | +import org.apache.datasketches.tuple.TupleSketch; |
| 40 | +import org.apache.datasketches.tuple.TupleSketchIterator; |
30 | 41 | import org.testng.annotations.Test; |
31 | 42 |
|
32 | 43 | /** |
@@ -108,4 +119,108 @@ public void generateBinariesForCompatibilityTestingEmptyStrings() throws IOExcep |
108 | 119 |
|
109 | 120 | putBytesToJavaPath("aos_empty_strings_java.sk", sk.compact().toByteArray()); |
110 | 121 | } |
| 122 | + |
| 123 | + @Test(groups = {CHECK_CPP_FILES}) |
| 124 | + public void deserializeFromCppOneString() throws IOException { |
| 125 | + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; |
| 126 | + for (int n : nArr) { |
| 127 | + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_n" + n + "_cpp.sk")); |
| 128 | + final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); |
| 129 | + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); |
| 130 | + assertEquals(sketch.getEstimate(), n, n * 0.03); |
| 131 | + assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode()); |
| 132 | + |
| 133 | + final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator(); |
| 134 | + while (it.next()) { |
| 135 | + assertTrue(it.getHash() < sketch.getThetaLong()); |
| 136 | + final String[] summary = it.getSummary().getValue(); |
| 137 | + assertEquals(summary.length, 1); |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + @Test(groups = {CHECK_CPP_FILES}) |
| 143 | + public void deserializeFromCppThreeStrings() throws IOException { |
| 144 | + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; |
| 145 | + for (int n : nArr) { |
| 146 | + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_3_n" + n + "_cpp.sk")); |
| 147 | + final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); |
| 148 | + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); |
| 149 | + assertEquals(sketch.getEstimate(), n, n * 0.03); |
| 150 | + assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode()); |
| 151 | + |
| 152 | + final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator(); |
| 153 | + while (it.next()) { |
| 154 | + assertTrue(it.getHash() < sketch.getThetaLong()); |
| 155 | + final String[] summary = it.getSummary().getValue(); |
| 156 | + assertEquals(summary.length, 3); |
| 157 | + } |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + @Test(groups = {CHECK_CPP_FILES}) |
| 162 | + public void deserializeFromCppOneStringNonEmptyNoEntries() throws IOException { |
| 163 | + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_non_empty_no_entries_cpp.sk")); |
| 164 | + final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); |
| 165 | + |
| 166 | + assertFalse(sketch.isEmpty()); |
| 167 | + assertEquals(sketch.getRetainedEntries(), 0); |
| 168 | + } |
| 169 | + |
| 170 | + @Test(groups = {CHECK_CPP_FILES}) |
| 171 | + public void deserializeFromCppMultiKeyStrings() throws IOException { |
| 172 | + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; |
| 173 | + for (int n : nArr) { |
| 174 | + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_multikey_n" + n + "_cpp.sk")); |
| 175 | + final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); |
| 176 | + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); |
| 177 | + assertEquals(sketch.getEstimate(), n, n * 0.03); |
| 178 | + assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode()); |
| 179 | + |
| 180 | + final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator(); |
| 181 | + while (it.next()) { |
| 182 | + assertTrue(it.getHash() < sketch.getThetaLong()); |
| 183 | + final String[] summary = it.getSummary().getValue(); |
| 184 | + assertEquals(summary.length, 1); |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | + |
| 189 | + @Test(groups = {CHECK_CPP_FILES}) |
| 190 | + public void deserializeFromCppUnicodeStrings() throws IOException { |
| 191 | + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_unicode_cpp.sk")); |
| 192 | + final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); |
| 193 | + assertFalse(sketch.isEmpty()); |
| 194 | + assertFalse(sketch.isEstimationMode()); |
| 195 | + assertEquals(sketch.getEstimate(), 3.0); |
| 196 | + |
| 197 | + final Set<List<String>> summaries = getSummaries(sketch); |
| 198 | + assertTrue(summaries.contains(Arrays.asList("밸류", "값"))); |
| 199 | + assertTrue(summaries.contains(Arrays.asList("📦", "🎁"))); |
| 200 | + assertTrue(summaries.contains(Arrays.asList("ценить1", "ценить2"))); |
| 201 | + } |
| 202 | + |
| 203 | + @Test(groups = {CHECK_CPP_FILES}) |
| 204 | + public void deserializeFromCppEmptyStrings() throws IOException { |
| 205 | + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_empty_strings_cpp.sk")); |
| 206 | + final TupleSketch<ArrayOfStringsSummary> sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); |
| 207 | + assertFalse(sketch.isEmpty()); |
| 208 | + assertFalse(sketch.isEstimationMode()); |
| 209 | + assertEquals(sketch.getEstimate(), 3.0); |
| 210 | + |
| 211 | + final Set<List<String>> summaries = getSummaries(sketch); |
| 212 | + assertTrue(summaries.contains(Arrays.asList("empty_key_value"))); |
| 213 | + assertTrue(summaries.contains(Arrays.asList(""))); |
| 214 | + assertTrue(summaries.contains(Arrays.asList("", ""))); |
| 215 | + } |
| 216 | + |
| 217 | + private static Set<List<String>> getSummaries(final TupleSketch<ArrayOfStringsSummary> sketch) { |
| 218 | + final Set<List<String>> summaries = new HashSet<>(); |
| 219 | + final TupleSketchIterator<ArrayOfStringsSummary> it = sketch.iterator(); |
| 220 | + while (it.next()) { |
| 221 | + assertTrue(it.getHash() < sketch.getThetaLong()); |
| 222 | + summaries.add(Arrays.asList(it.getSummary().getValue())); |
| 223 | + } |
| 224 | + return summaries; |
| 225 | + } |
111 | 226 | } |
0 commit comments