Skip to content

Commit 23072ed

Browse files
authored
Merge pull request #716 from proost/test-reservoir-sketch-union-cross-language
test: reservoir sketch union cross language test cases
2 parents b786823 + 3092098 commit 23072ed

File tree

1 file changed

+206
-4
lines changed

1 file changed

+206
-4
lines changed

src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java

Lines changed: 206 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@
2222
import org.apache.datasketches.common.ArrayOfDoublesSerDe;
2323
import org.apache.datasketches.common.ArrayOfLongsSerDe;
2424
import org.apache.datasketches.common.ArrayOfStringsSerDe;
25+
import org.apache.datasketches.common.ResizeFactor;
2526
import org.testng.annotations.Test;
2627

2728
import java.io.IOException;
2829
import java.nio.file.Files;
30+
import java.util.ArrayList;
2931

3032
import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
3133
import static org.apache.datasketches.common.TestUtil.javaPath;
@@ -74,7 +76,7 @@ public void generateReservoirLongsSketchSampling() throws IOException {
7476
final ReservoirLongsSketch sk = ReservoirLongsSketch.getInstance(
7577
predeterminedSamples,
7678
n,
77-
org.apache.datasketches.common.ResizeFactor.X8,
79+
ResizeFactor.X8,
7880
k
7981
);
8082

@@ -83,6 +85,56 @@ public void generateReservoirLongsSketchSampling() throws IOException {
8385
}
8486
}
8587

88+
@Test(groups = {GENERATE_JAVA_FILES})
89+
public void generateReservoirLongsUnionEmpty() throws IOException {
90+
int maxK = 128;
91+
ReservoirLongsUnion union = ReservoirLongsUnion.newInstance(maxK);
92+
93+
Files.newOutputStream(javaPath.resolve("reservoir_longs_union_empty_maxk" + maxK + "_java.sk"))
94+
.write(union.toByteArray());
95+
}
96+
97+
@Test(groups = {GENERATE_JAVA_FILES})
98+
public void generateReservoirLongsUnionExact() throws IOException {
99+
int maxK = 128;
100+
int[] nArr = {1, 10, 32, 100, 128};
101+
102+
for (int n : nArr) {
103+
ReservoirLongsUnion union = ReservoirLongsUnion.newInstance(maxK);
104+
for (int i = 0; i < n; i++) {
105+
union.update(i);
106+
}
107+
Files.newOutputStream(javaPath.resolve("reservoir_longs_union_exact_n" + n + "_maxk" + maxK + "_java.sk"))
108+
.write(union.toByteArray());
109+
}
110+
}
111+
112+
@Test(groups = {GENERATE_JAVA_FILES})
113+
public void generateReservoirLongsUnionSampling() throws IOException {
114+
int[] maxKArr = {32, 64, 128};
115+
long n = 1000;
116+
117+
for (int maxK : maxKArr) {
118+
long[] predeterminedSamples = new long[maxK];
119+
for (int i = 0; i < maxK; i++) {
120+
predeterminedSamples[i] = i * 2;
121+
}
122+
123+
ReservoirLongsSketch sk = ReservoirLongsSketch.getInstance(
124+
predeterminedSamples,
125+
n,
126+
ResizeFactor.X8,
127+
maxK
128+
);
129+
130+
ReservoirLongsUnion union = ReservoirLongsUnion.newInstance(maxK);
131+
union.update(sk);
132+
133+
Files.newOutputStream(javaPath.resolve("reservoir_longs_union_sampling_n" + n + "_maxk" + maxK + "_java.sk"))
134+
.write(union.toByteArray());
135+
}
136+
}
137+
86138
@Test(groups = {GENERATE_JAVA_FILES})
87139
public void generateReservoirItemsSketchLongEmpty() throws IOException {
88140
final int k = 128;
@@ -121,7 +173,7 @@ public void generateReservoirItemsSketchLongSampling() throws IOException {
121173
final ReservoirItemsSketch<Long> sk = ReservoirItemsSketch.newInstance(
122174
predeterminedSamples,
123175
n,
124-
org.apache.datasketches.common.ResizeFactor.X8,
176+
ResizeFactor.X8,
125177
k
126178
);
127179

@@ -168,7 +220,7 @@ public void generateReservoirItemsSketchDoubleSampling() throws IOException {
168220
final ReservoirItemsSketch<Double> sk = ReservoirItemsSketch.newInstance(
169221
predeterminedSamples,
170222
n,
171-
org.apache.datasketches.common.ResizeFactor.X8,
223+
ResizeFactor.X8,
172224
k
173225
);
174226

@@ -215,12 +267,162 @@ public void generateReservoirItemsSketchStringSampling() throws IOException {
215267
final ReservoirItemsSketch<String> sk = ReservoirItemsSketch.newInstance(
216268
predeterminedSamples,
217269
n,
218-
org.apache.datasketches.common.ResizeFactor.X8,
270+
ResizeFactor.X8,
219271
k
220272
);
221273

222274
Files.newOutputStream(javaPath.resolve("reservoir_items_string_sampling_n" + n + "_k" + k + "_java.sk"))
223275
.write(sk.toByteArray(new ArrayOfStringsSerDe()));
224276
}
225277
}
278+
279+
@Test(groups = {GENERATE_JAVA_FILES})
280+
public void generateReservoirItemsUnionLongEmpty() throws IOException {
281+
int maxK = 128;
282+
ReservoirItemsUnion<Long> union = ReservoirItemsUnion.newInstance(maxK);
283+
284+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_long_empty_maxk" + maxK + "_java.sk"))
285+
.write(union.toByteArray(new ArrayOfLongsSerDe()));
286+
}
287+
288+
@Test(groups = {GENERATE_JAVA_FILES})
289+
public void generateReservoirItemsUnionLongExact() throws IOException {
290+
int maxK = 128;
291+
int[] nArr = {1, 10, 32, 100, 128};
292+
293+
for (int n : nArr) {
294+
ReservoirItemsUnion<Long> union = ReservoirItemsUnion.newInstance(maxK);
295+
for (int i = 0; i < n; i++) {
296+
union.update((long) i);
297+
}
298+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_long_exact_n" + n + "_maxk" + maxK + "_java.sk"))
299+
.write(union.toByteArray(new ArrayOfLongsSerDe()));
300+
}
301+
}
302+
303+
@Test(groups = {GENERATE_JAVA_FILES})
304+
public void generateReservoirItemsUnionLongSampling() throws IOException {
305+
int[] maxKArr = {32, 64, 128};
306+
long n = 1000;
307+
308+
for (int maxK : maxKArr) {
309+
ArrayList<Long> predeterminedSamples = new ArrayList<>();
310+
for (int i = 0; i < maxK; i++) {
311+
predeterminedSamples.add((long) (i * 2));
312+
}
313+
314+
ReservoirItemsSketch<Long> sk = ReservoirItemsSketch.newInstance(
315+
predeterminedSamples,
316+
n,
317+
ResizeFactor.X8,
318+
maxK
319+
);
320+
321+
ReservoirItemsUnion<Long> union = ReservoirItemsUnion.newInstance(maxK);
322+
union.update(sk);
323+
324+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_long_sampling_n" + n + "_maxk" + maxK + "_java.sk"))
325+
.write(union.toByteArray(new ArrayOfLongsSerDe()));
326+
}
327+
}
328+
329+
@Test(groups = {GENERATE_JAVA_FILES})
330+
public void generateReservoirItemsUnionDoubleEmpty() throws IOException {
331+
int maxK = 128;
332+
ReservoirItemsUnion<Double> union = ReservoirItemsUnion.newInstance(maxK);
333+
334+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_double_empty_maxk" + maxK + "_java.sk"))
335+
.write(union.toByteArray(new ArrayOfDoublesSerDe()));
336+
}
337+
338+
@Test(groups = {GENERATE_JAVA_FILES})
339+
public void generateReservoirItemsUnionDoubleExact() throws IOException {
340+
int maxK = 128;
341+
int[] nArr = {1, 10, 32, 100, 128};
342+
343+
for (int n : nArr) {
344+
ReservoirItemsUnion<Double> union = ReservoirItemsUnion.newInstance(maxK);
345+
for (int i = 0; i < n; i++) {
346+
union.update((double) i);
347+
}
348+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_double_exact_n" + n + "_maxk" + maxK + "_java.sk"))
349+
.write(union.toByteArray(new ArrayOfDoublesSerDe()));
350+
}
351+
}
352+
353+
@Test(groups = {GENERATE_JAVA_FILES})
354+
public void generateReservoirItemsUnionDoubleSampling() throws IOException {
355+
int[] maxKArr = {32, 64, 128};
356+
long n = 1000;
357+
358+
for (int maxK : maxKArr) {
359+
ArrayList<Double> predeterminedSamples = new ArrayList<>();
360+
for (int i = 0; i < maxK; i++) {
361+
predeterminedSamples.add((double) (i * 2));
362+
}
363+
364+
ReservoirItemsSketch<Double> sk = ReservoirItemsSketch.newInstance(
365+
predeterminedSamples,
366+
n,
367+
ResizeFactor.X8,
368+
maxK
369+
);
370+
371+
ReservoirItemsUnion<Double> union = ReservoirItemsUnion.newInstance(maxK);
372+
union.update(sk);
373+
374+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_double_sampling_n" + n + "_maxk" + maxK + "_java.sk"))
375+
.write(union.toByteArray(new ArrayOfDoublesSerDe()));
376+
}
377+
}
378+
379+
@Test(groups = {GENERATE_JAVA_FILES})
380+
public void generateReservoirItemsUnionStringEmpty() throws IOException {
381+
int maxK = 128;
382+
ReservoirItemsUnion<String> union = ReservoirItemsUnion.newInstance(maxK);
383+
384+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_string_empty_maxk" + maxK + "_java.sk"))
385+
.write(union.toByteArray(new ArrayOfStringsSerDe()));
386+
}
387+
388+
@Test(groups = {GENERATE_JAVA_FILES})
389+
public void generateReservoirItemsUnionStringExact() throws IOException {
390+
int maxK = 128;
391+
int[] nArr = {1, 10, 32, 100, 128};
392+
393+
for (int n : nArr) {
394+
ReservoirItemsUnion<String> union = ReservoirItemsUnion.newInstance(maxK);
395+
for (int i = 0; i < n; i++) {
396+
union.update("item" + i);
397+
}
398+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_string_exact_n" + n + "_maxk" + maxK + "_java.sk"))
399+
.write(union.toByteArray(new ArrayOfStringsSerDe()));
400+
}
401+
}
402+
403+
@Test(groups = {GENERATE_JAVA_FILES})
404+
public void generateReservoirItemsUnionStringSampling() throws IOException {
405+
int[] maxKArr = {32, 64, 128};
406+
long n = 1000;
407+
408+
for (int maxK : maxKArr) {
409+
ArrayList<String> predeterminedSamples = new ArrayList<>();
410+
for (int i = 0; i < maxK; i++) {
411+
predeterminedSamples.add("item" + (i * 2));
412+
}
413+
414+
ReservoirItemsSketch<String> sk = ReservoirItemsSketch.newInstance(
415+
predeterminedSamples,
416+
n,
417+
ResizeFactor.X8,
418+
maxK
419+
);
420+
421+
ReservoirItemsUnion<String> union = ReservoirItemsUnion.newInstance(maxK);
422+
union.update(sk);
423+
424+
Files.newOutputStream(javaPath.resolve("reservoir_items_union_string_sampling_n" + n + "_maxk" + maxK + "_java.sk"))
425+
.write(union.toByteArray(new ArrayOfStringsSerDe()));
426+
}
427+
}
226428
}

0 commit comments

Comments
 (0)